rm(list=ls()) # clear workspace
cat("\014") # clear console
ticker="FRED/TOTALNSA"
startdate<-"1980-01-01"
library(Quandl)
quandl_api_key<-read.csv("../data/quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(quandl_api_key)
VehicleNSA <- Quandl(ticker,start_date=startdate,type="ts")
plot(VehicleNSA)
decomp<-decompose(VehicleNSA)
plot(decomp)
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(fredr)
fred_api_key<-read.csv("./fredkey.csv",stringsAsFactors=FALSE)
fredr_set_key(fred_api_key$Key)
ticker="FRED/TOTALNSA"
startdate<-"1980-01-01"
library(Quandl)
VehicleNSA=Quandl(ticker,start_date=startdate,type="ts")
plot(VehicleNSA)
decomp<-decompose(VehicleNSA)
VehicleSA<-decomp$x-decomp$seasonal
Vehicle<-cbind(as.xts(VehicleNSA),as.xts(VehicleSA))
colnames(Vehicle)<-c("NSA","SA")
plot(Vehicle,legend.loc = "topleft")
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(fredr)
fred_api_key<-read.csv("./fredkey.csv",stringsAsFactors=FALSE)
fredr_set_key(fred_api_key$Key)
ticker="FRED/TOTALNSA"
startdate<-"1980-01-01"
library(Quandl)
VehicleNSA=Quandl(ticker,start_date=startdate,type="ts")
library(TTR)
Smoothed<-SMA(VehicleNSA,n=12)
toplot<-cbind(as.xts(VehicleNSA),as.xts(Smoothed))
colnames(toplot)<-c("NSA","Smoothed")
plot(toplot,legend.loc="topleft")
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC,Returns) #Run Regression
print(summary(reg1))
##
## Call:
## lm(formula = DAX ~ CAC, data = Returns)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.044676 -0.004302 -0.000076 0.004093 0.040100
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0003523 0.0001623 2.17 0.0301 *
## CAC 0.6858248 0.0147070 46.63 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.006993 on 1857 degrees of freedom
## Multiple R-squared: 0.5394, Adjusted R-squared: 0.5391
## F-statistic: 2175 on 1 and 1857 DF, p-value: < 2.2e-16
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC+FTSE,Returns) #Run Regression
print(summary(reg1))
##
## Call:
## lm(formula = DAX ~ CAC + FTSE, data = Returns)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.046142 -0.003848 -0.000009 0.003987 0.033370
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0002694 0.0001542 1.747 0.0807 .
## CAC 0.5152838 0.0183380 28.099 <2e-16 ***
## FTSE 0.3644971 0.0254199 14.339 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.006637 on 1856 degrees of freedom
## Multiple R-squared: 0.5853, Adjusted R-squared: 0.5849
## F-statistic: 1310 on 2 and 1856 DF, p-value: < 2.2e-16
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC:FTSE,Returns) #Run Regression
reg2<-lm(DAX~CAC*FTSE,Returns) # Run Regression
rm(list=ls()) # clear workspace
cat("\014") # clear console
library("tidyquant")
Tickers = c('MSFT','GE') # asset ticker
start_date = '2017-01-01' # data start date
end_date = '2021-03-01' # data end date
data_src = 'yahoo' # data source
getSymbols(Tickers,from = start_date,to = end_date, src=data_src)
## [1] "MSFT" "GE"
Returns = do.call(merge,lapply(Tickers, function(x)
periodReturn(Ad(get(x)),period='daily',type='log')))
Returns = na.omit(Returns[-1,])
colnames(Returns)<-Tickers
library(lubridate)
Returns$Q1<-ifelse(quarter(index(Returns))==1,1,0)
reg1=lm(MSFT~GE*Q1,data=Returns)
print(summary(reg1))
##
## Call:
## lm(formula = MSFT ~ GE * Q1, data = Returns)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.088806 -0.007493 0.000016 0.008366 0.095418
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.460e-03 5.973e-04 2.445 0.0147 *
## GE 9.620e-02 2.342e-02 4.108 4.3e-05 ***
## Q1 -2.437e-05 1.147e-03 -0.021 0.9831
## GE:Q1 3.423e-01 3.848e-02 8.894 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01647 on 1040 degrees of freedom
## Multiple R-squared: 0.1766, Adjusted R-squared: 0.1742
## F-statistic: 74.36 on 3 and 1040 DF, p-value: < 2.2e-16
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC,Returns) #Run Regression
plot(DAX~CAC,data=Returns)
abline(reg1)
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC,Returns) #Run Regression
#par(mfrow=c(3,2))
plot(reg1,which=c(1))
plot(reg1,which=c(2))
plot(reg1,which=c(3))
plot(reg1,which=c(4))
plot(reg1,which=c(5))
plot(reg1,which=c(6))
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC,Returns) #Run Regression
print('Coefficients')
## [1] "Coefficients"
coef(reg1)
## (Intercept) CAC
## 0.0003522993 0.6858247625
alpha=coef(reg1)[1]
beta=coef(reg1)[2]
print('Confidence intervals')
## [1] "Confidence intervals"
confint(reg1)
## 2.5 % 97.5 %
## (Intercept) 3.396064e-05 0.000670638
## CAC 6.569808e-01 0.714668761
print('Covariance')
## [1] "Covariance"
vcov(reg1)
## (Intercept) CAC
## (Intercept) 2.634610e-08 -9.453302e-08
## CAC -9.453302e-08 2.162960e-04
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC,Returns) #Run Regression
Errors<-resid(reg1)
plot(Errors)
#or
Errors2<-Returns$DAX-reg1$fitted.values
head(cbind(Errors,Errors2))
## Errors Errors2
## 2 -0.0009971609 -0.0009971609
## 3 0.0080783190 0.0080783190
## 4 0.0126150011 0.0126150011
## 5 -0.0081269243 -0.0081269243
## 6 -0.0015174787 -0.0015174787
## 7 0.0040407495 0.0040407495
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC,Returns) #Run Regression
Estimates<-fitted(reg1)
#or
Estimates2<-coef(reg1)[1]+coef(reg1)[2]*Returns$CAC
head(cbind(Estimates,Estimates2))
## Estimates Estimates2
## 2 -0.008329389 -0.008329389
## 3 -0.012500494 -0.012500494
## 4 -0.003611207 -0.003611207
## 5 0.006348707 0.006348707
## 6 -0.003159233 -0.003159233
## 7 0.008386293 0.008386293
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
newCAC<-data.frame(CAC=seq(-.05,.05,.01))
PredictedModel<-predict(lm(DAX~CAC,Returns),newCAC,se.fit=TRUE)
cbind(newCAC,PredictedModel$fit,PredictedModel$se.fit)
## CAC PredictedModel$fit PredictedModel$se.fit
## 1 -0.05 -0.0339389388 0.0007593019
## 2 -0.04 -0.0270806912 0.0006164270
## 3 -0.03 -0.0202224436 0.0004761139
## 4 -0.02 -0.0133641959 0.0003415345
## 5 -0.01 -0.0065059483 0.0002233078
## 6 0.00 0.0003522993 0.0001623148
## 7 0.01 0.0072105469 0.0002146743
## 8 0.02 0.0140687946 0.0003302774
## 9 0.03 0.0209270422 0.0004640479
## 10 0.04 0.0277852898 0.0006040340
## 11 0.05 0.0346435374 0.0007467481
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC,Returns) #Run Regression
print(summary(reg1))
##
## Call:
## lm(formula = DAX ~ CAC, data = Returns)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.044676 -0.004302 -0.000076 0.004093 0.040100
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0003523 0.0001623 2.17 0.0301 *
## CAC 0.6858248 0.0147070 46.63 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.006993 on 1857 degrees of freedom
## Multiple R-squared: 0.5394, Adjusted R-squared: 0.5391
## F-statistic: 2175 on 1 and 1857 DF, p-value: < 2.2e-16
h0=0 # set to your hypotehsized value
tstat = (summary(reg1)$coefficients[1,1]-h0)/summary(reg1)$coefficients[1,2]
pvalue = pt(q=abs(tstat),df = reg1$df,lower.tail = FALSE)*2
pvalue
## [1] 0.03009767
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC,Returns) #Run Regression
print(summary(reg1))
##
## Call:
## lm(formula = DAX ~ CAC, data = Returns)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.044676 -0.004302 -0.000076 0.004093 0.040100
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0003523 0.0001623 2.17 0.0301 *
## CAC 0.6858248 0.0147070 46.63 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.006993 on 1857 degrees of freedom
## Multiple R-squared: 0.5394, Adjusted R-squared: 0.5391
## F-statistic: 2175 on 1 and 1857 DF, p-value: < 2.2e-16
print('H0: slope > 0')
## [1] "H0: slope > 0"
h0=0 # set to your hypothesized value
tstat = (summary(reg1)$coefficients[1,1]-h0)/summary(reg1)$coefficients[1,2]
pvalue = pt(q=tstat,df = reg1$df,lower.tail = TRUE)
pvalue
## [1] 0.9849512
print('H0: slope < 0')
## [1] "H0: slope < 0"
h0=0 # set to your hypothesized value
tstat = (summary(reg1)$coefficients[1,1]-h0)/summary(reg1)$coefficients[1,2]
pvalue = 1-pt(q=tstat,df = reg1$df,lower.tail = TRUE)
pvalue
## [1] 0.01504883
pvalue = pt(q=tstat,df = reg1$df,lower.tail = FALSE)
pvalue
## [1] 0.01504883
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC+FTSE,Returns) #Run Regression
print(summary(reg1))
##
## Call:
## lm(formula = DAX ~ CAC + FTSE, data = Returns)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.046142 -0.003848 -0.000009 0.003987 0.033370
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0002694 0.0001542 1.747 0.0807 .
## CAC 0.5152838 0.0183380 28.099 <2e-16 ***
## FTSE 0.3644971 0.0254199 14.339 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.006637 on 1856 degrees of freedom
## Multiple R-squared: 0.5853, Adjusted R-squared: 0.5849
## F-statistic: 1310 on 2 and 1856 DF, p-value: < 2.2e-16
library(car)
# Test if the coefficients are equal
linearHypothesis(reg1,c("CAC=FTSE"))
## Linear hypothesis test
##
## Hypothesis:
## CAC - FTSE = 0
##
## Model 1: restricted model
## Model 2: DAX ~ CAC + FTSE
##
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 1857 0.082383
## 2 1856 0.081752 1 0.00063101 14.326 0.0001586 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Test if the coefficients take specific values jointly
linearHypothesis(reg1,c("CAC=0","FTSE=.36"))
## Linear hypothesis test
##
## Hypothesis:
## CAC = 0
## FTSE = 0.36
##
## Model 1: restricted model
## Model 2: DAX ~ CAC + FTSE
##
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 1858 0.142273
## 2 1856 0.081752 2 0.060521 687 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC,Returns) #Restricted
reg2<-lm(DAX~.,Returns) #Unrestricted
anova(reg1,reg2)
## Analysis of Variance Table
##
## Model 1: DAX ~ CAC
## Model 2: DAX ~ SMI + CAC + FTSE
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 1857 0.090808
## 2 1855 0.067909 2 0.0229 312.76 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~.,Returns) #Run Regression for all assets ("." gives all assets not DAX)
library(car)
vif(reg1)
## SMI CAC FTSE
## 1.781686 2.023629 1.908167
#VIF>5 indicates multicollinearity
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~.,Returns) #Run Regression for all assets ("." gives all assets not DAX)
library(lmtest)
#H0:Homoscedasticity (i.e. var(residuals)=constant)
bptest(reg1)
##
## studentized Breusch-Pagan test
##
## data: reg1
## BP = 7.3734, df = 3, p-value = 0.0609
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC+FTSE,Returns) #Run Regression
print(summary(reg1))
##
## Call:
## lm(formula = DAX ~ CAC + FTSE, data = Returns)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.046142 -0.003848 -0.000009 0.003987 0.033370
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0002694 0.0001542 1.747 0.0807 .
## CAC 0.5152838 0.0183380 28.099 <2e-16 ***
## FTSE 0.3644971 0.0254199 14.339 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.006637 on 1856 degrees of freedom
## Multiple R-squared: 0.5853, Adjusted R-squared: 0.5849
## F-statistic: 1310 on 2 and 1856 DF, p-value: < 2.2e-16
library(car)
# Examine CAC
linearHypothesis(reg1,c("CAC=0"))
## Linear hypothesis test
##
## Hypothesis:
## CAC = 0
##
## Model 1: restricted model
## Model 2: DAX ~ CAC + FTSE
##
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 1857 0.116530
## 2 1856 0.081752 1 0.034778 789.56 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Examine CAC with hetero robust std errors
linearHypothesis(reg1,c("CAC=0"),white.adjust = TRUE)
## Linear hypothesis test
##
## Hypothesis:
## CAC = 0
##
## Model 1: restricted model
## Model 2: DAX ~ CAC + FTSE
##
## Note: Coefficient covariance matrix supplied.
##
## Res.Df Df F Pr(>F)
## 1 1857
## 2 1856 1 274.82 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC+FTSE,Returns) #Run Regression
print(summary(reg1))
##
## Call:
## lm(formula = DAX ~ CAC + FTSE, data = Returns)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.046142 -0.003848 -0.000009 0.003987 0.033370
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0002694 0.0001542 1.747 0.0807 .
## CAC 0.5152838 0.0183380 28.099 <2e-16 ***
## FTSE 0.3644971 0.0254199 14.339 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.006637 on 1856 degrees of freedom
## Multiple R-squared: 0.5853, Adjusted R-squared: 0.5849
## F-statistic: 1310 on 2 and 1856 DF, p-value: < 2.2e-16
library("lmtest")
library("sandwich")
coeftest(reg1, vcov = vcovHC(reg1, type = "HC0"))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.00026938 0.00015441 1.7446 0.08122 .
## CAC 0.51528381 0.03061855 16.8291 < 2e-16 ***
## FTSE 0.36449714 0.03611028 10.0940 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC,Returns) #Restricted
reg2<-lm(DAX~.,Returns) #Unrestricted
anova(reg1,reg2)
## Analysis of Variance Table
##
## Model 1: DAX ~ CAC
## Model 2: DAX ~ SMI + CAC + FTSE
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 1857 0.090808
## 2 1855 0.067909 2 0.0229 312.76 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(lmtest)
library(sandwich)
waldtest(reg1, reg2, vcov = vcovHC(reg2, type = "HC0"))
## Wald test
##
## Model 1: DAX ~ CAC
## Model 2: DAX ~ SMI + CAC + FTSE
## Res.Df Df F Pr(>F)
## 1 1857
## 2 1855 2 170.3 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC+FTSE,Returns) #Run Regression
library(lmtest)
dwtest(reg1) #Durbin Watson
##
## Durbin-Watson test
##
## data: reg1
## DW = 1.9577, p-value = 0.1799
## alternative hypothesis: true autocorrelation is greater than 0
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC+FTSE,Returns) #Run Regression
library(lmtest)
#perform Breusch-Godfrey test
# H0: no serial correlation up to indicated order
bgtest(reg1, order=3)
##
## Breusch-Godfrey test for serial correlation of order up to 3
##
## data: reg1
## LM test = 0.80293, df = 3, p-value = 0.8488
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC+FTSE,Returns) #Run Regression
library(lmtest)
# H0: errors are independent; no serial correlation
Box.test(reg1$residuals, lag=5,type="Ljung")
##
## Box-Ljung test
##
## data: reg1$residuals
## X-squared = 0.9576, df = 5, p-value = 0.9659
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC+FTSE,Returns) #Run Regression
reg1summary<-summary(reg1)
#Here, we will use newey west robust errors. The pre-whitening and adjust are set to F, and T respectively to ensure the proper formula and small sample adjustments are made.
#https://www.econometrics-with-r.org/15-4-hac-standard-errors.html
library(sandwich)
#NW_VCOV_msft <- NeweyWest(lm(as.numeric(msftXS)~as.numeric(spyXS)), prewhite = F, adjust = T)
NW_VCOV <- NeweyWest(reg1, prewhite = F, adjust = T)
#compute standard errors
hac_err_CAC=sqrt(diag(NW_VCOV))[2] # the [2] is to retrieve the second element, which corresponds to the "independent" variable.
#Compare the standard Errors
cbind(reg1summary$coefficients[2,2],hac_err_CAC)
## hac_err_CAC
## CAC 0.01833804 0.03779094
#Compute new t-stat
newCACt=(reg1$coefficients['CAC']-0)/hac_err_CAC
#Compare the t-stats
cbind(reg1summary$coefficients[2,3],newCACt)
## newCACt
## CAC 28.09917 13.63512
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(quantmod)
startd<-"2015-12-01"
endd<-"2020-12-31"
freq<-"monthly"
tickers_code <- c("MSFT","^GSPC","TB4WK") # GSPC=SP500; TB4WK=1mt Treasury Yield
getSymbols(tickers_code[1:2],from = startd, to =endd, periodicity = freq, src = 'yahoo')
## [1] "MSFT" "^GSPC"
getSymbols(tickers_code[3],src = 'FRED')
## [1] "TB4WK"
### Data processing
tickers = gsub("[[:punct:]]", "", tickers_code)
# Prices
Price = do.call(merge, lapply(tickers[1:2], function(x) Ad(get(x))))
names(Price) = lapply(tickers[1:2], function(x) paste(x,".Price",sep=""))
# Returns
{Return = do.call(merge,lapply(Price, function(x)
periodReturn(x,period='monthly',type='log')))}
names(Return) = lapply(tickers[1:2], function(x) paste(x,".Return",sep=""))
# Risk free rate
Rf = TB4WK["2016-01-01/2020-12-31"] # this is an annual rate by default
Rf = (Rf/100+1)^(1/12)-1 # convert to monthly date
Rf = log(1+Rf) # convert to log
names(Rf) = "Rf"
Asset = do.call(merge,list(Price,Return,Rf))### merge data together
Asset = na.omit(Asset)#clean NA's
Asset$GSPC.ExcessReturn=Asset$GSPC.Return-Asset$Rf
Asset$MSFT.ExcessReturn=Asset$MSFT.Return-Asset$Rf
CAPMModel=lm(MSFT.ExcessReturn~GSPC.ExcessReturn,data=Asset) #run CAPM
print(summary(CAPMModel))
##
## Call:
## lm(formula = MSFT.ExcessReturn ~ GSPC.ExcessReturn, data = Asset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.120639 -0.023982 0.004723 0.021406 0.076649
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.016359 0.005011 3.265 0.00184 **
## GSPC.ExcessReturn 0.802098 0.111760 7.177 1.48e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03798 on 58 degrees of freedom
## Multiple R-squared: 0.4704, Adjusted R-squared: 0.4612
## F-statistic: 51.51 on 1 and 58 DF, p-value: 1.478e-09
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(quantmod)
startd<-"2015-12-01"
endd<-"2020-12-31"
freq<-"monthly"
tickers_code <- c("IBM","AAPL","GOOG","FB","MSFT","^GSPC","TB4WK") # GSPC=SP500; TB4WK=1mt Treasury Yield
getSymbols(tickers_code[1:6],from = startd, to =endd, periodicity = freq, src = 'yahoo')
## [1] "IBM" "AAPL" "GOOG" "FB" "MSFT" "^GSPC"
getSymbols(tickers_code[7],src = 'FRED')
## [1] "TB4WK"
library(tidyverse)
# risk free rate
Rf = TB4WK["2016-01-01/2020-03-31"] # annual rate
Rf = (Rf/100+1)^(1/12)-1 # convert to month rate
Rf = log(1+Rf) # converting to log returns
names(Rf) = "Rf"
# market excess return
ExcessReturn.Market = data.frame(periodReturn(Ad(get("GSPC")),
period = "monthly",type='log')[-1,]-Rf)
# stocks' excess return
df <- tibble(Ticker = tickers_code[1:5],
ExcessReturn.Stock = do.call(c,lapply(Ticker, function(x)
data.frame(periodReturn(Ad(get(x)),type='log')[-1,]-Rf))),
ExcessReturn.Market = rep(ExcessReturn.Market,5),
#Date = index(Rf)
Date = do.call(c,lapply(Ticker, function(x) (list(index(Rf)))))
)
library(plyr)
# convert to long table
df_long = df %>% unnest(colnames(df))
#Break up df_long by Ticker, then fit the specified model to each piece and return a list
models <- dlply(df_long, "Ticker", function(x) lm(ExcessReturn.Stock~1+ExcessReturn.Market, data = x))
# Apply coef to each model and return a data frame
coefs=ldply(models, coef)
names(coefs) = c("Ticker","Intercept","Beta")
print(coefs)
## Ticker Intercept Beta
## 1 AAPL 0.013577109 1.1372826
## 2 FB 0.004428500 1.0303281
## 3 GOOG 0.003823518 0.9839200
## 4 IBM -0.006817774 1.4072021
## 5 MSFT 0.018216833 0.8067236
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(quantmod)
startd<-"2015-12-01"
endd<-"2020-12-31"
freq<-"monthly"
tickers_code <- c("IBM","^GSPC","TB4WK") # GSPC=SP500; TB4WK=1mt Treasury Yield
getSymbols(tickers_code[1:2],from = startd, to =endd, periodicity = freq, src = 'yahoo')
## [1] "IBM" "^GSPC"
getSymbols(tickers_code[3],src = 'FRED')
## [1] "TB4WK"
library(tidyverse)
# risk free rate
Rf = TB4WK["2016-01-01/2020-03-31"] # annual rate
Rf = (Rf/100+1)^(1/12)-1 # convert to month rate
Rf = log(1+Rf) # converting to log returns
names(Rf) = "Rf"
# market excess return
ExcessReturn.Market = data.frame(periodReturn(Ad(get("GSPC")),
period = "monthly",type='log')[-1,]-Rf)
# stocks' excess return
df <- tibble(Ticker = tickers_code[1],
ExcessReturn.Stock = do.call(c,lapply(Ticker, function(x)
data.frame(periodReturn(Ad(get(x)),type='log')[-1,]-Rf))),
ExcessReturn.Market = rep(ExcessReturn.Market,1),
#Date = index(Rf)
Date = do.call(c,lapply(Ticker, function(x) (list(index(Rf)))))
)
library(plyr)
# convert to long table
df_long = df %>% unnest(colnames(df))
#Break up df_long by Ticker, then fit the specified model to each piece and return a list
models <- dlply(df_long, "Ticker", function(x) lm(ExcessReturn.Stock~1+ExcessReturn.Market, data = x))
# Apply coef to each model and return a data frame
coefs=ldply(models, coef)
names(coefs) = c("Ticker","Intercept","Beta")
library(rollRegres)
rollmodels <- dlply(df_long, "Ticker", function(x) roll_regres(ExcessReturn.Stock~1+ExcessReturn.Market,
x,width = 24L,
do_compute = c("sigmas", "r.squareds")))
# rolling coefficients
rollcoefs=ldply(rollmodels, function(x) x$coefs)
rollcoefs$Date = rep(index(Rf),1)
rollcoefs = na.omit(rollcoefs)
rollcoefs=rollcoefs[order(rollcoefs$Date,rollcoefs$Ticker),]
row.names(rollcoefs) =NULL
names(rollcoefs) = c("Ticker","Alpha","Beta","Date")
head(rollcoefs,10)
## Ticker Alpha Beta Date
## 1 IBM -0.012153540 1.804071 2017-12-01
## 2 IBM -0.012946989 1.776356 2018-01-01
## 3 IBM -0.013550875 1.716319 2018-02-01
## 4 IBM -0.008525027 1.187984 2018-03-01
## 5 IBM -0.009532404 1.207887 2018-04-01
## 6 IBM -0.012440405 1.145113 2018-05-01
## 7 IBM -0.012534233 1.148245 2018-06-01
## 8 IBM -0.012868609 1.097941 2018-07-01
## 9 IBM -0.013262752 1.087013 2018-08-01
## 10 IBM -0.011910720 1.071676 2018-09-01
colors = c("darkred")
linestypes = c("solid")
shapes = c(15)
ggplot(data = rollcoefs,
aes(x=Date,y=Beta,group = Ticker,color = Ticker, lty = Ticker,shape = Ticker))+
geom_point()+
geom_line()+
scale_x_date(date_breaks = "6 months" , date_labels = "%Y-%m")+
scale_color_manual(values = alpha(colors,0.5)) +
scale_linetype_manual(values = linestypes)+
scale_shape_manual(values=shapes)+
labs(x="Date", y="",title = "Rolling Beta (estimated window = 2 year)")
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(quantmod)
startd<-"2015-12-01"
endd<-"2020-12-31"
freq<-"monthly"
tickers_code <- c("IBM","F","GE","CAT","MSFT","^GSPC","TB4WK") # GSPC=SP500; TB4WK=1mt Treasury Yield
getSymbols(tickers_code[1:6],from = startd, to =endd, periodicity = freq, src = 'yahoo')
## [1] "IBM" "F" "GE" "CAT" "MSFT" "^GSPC"
getSymbols(tickers_code[7],src = 'FRED')
## [1] "TB4WK"
library(tidyverse)
# risk free rate
Rf = TB4WK["2016-01-01/2020-03-31"] # annual rate
Rf = (Rf/100+1)^(1/12)-1 # convert to month rate
Rf = log(1+Rf) # converting to log returns
names(Rf) = "Rf"
# market excess return
ExcessReturn.Market = data.frame(periodReturn(Ad(get("GSPC")),
period = "monthly",type='log')[-1,]-Rf)
# stocks' excess return
df <- tibble(Ticker = tickers_code[1:5],
ExcessReturn.Stock = do.call(c,lapply(Ticker, function(x)
data.frame(periodReturn(Ad(get(x)),type='log')[-1,]-Rf))),
ExcessReturn.Market = rep(ExcessReturn.Market,5),
#Date = index(Rf)
Date = do.call(c,lapply(Ticker, function(x) (list(index(Rf)))))
)
library(plyr)
# convert to long table
df_long = df %>% unnest(colnames(df))
#Break up df_long by Ticker, then fit the specified model to each piece and return a list
models <- dlply(df_long, "Ticker", function(x) lm(ExcessReturn.Stock~1+ExcessReturn.Market, data = x))
# Apply coef to each model and return a data frame
coefs=ldply(models, coef)
names(coefs) = c("Ticker","Intercept","Beta")
### expected Rf and market return
Rf_avg = mean(Rf)
Mkt_avg = mean(ExcessReturn.Market$monthly.returns)+Rf_avg
### require return from CAPM
coefs$RequireReturn = Rf_avg + coefs$Beta*(Mkt_avg-Rf_avg)
coefs$ExpectedReturn = ddply(df_long,"Ticker",summarise,
Mean = mean(ExcessReturn.Stock,na.rm = TRUE))$Mean+Rf_avg
head(coefs)
## Ticker Intercept Beta RequireReturn ExpectedReturn
## 1 CAT 0.007489770 1.2497801 0.005495941 0.0129857105
## 2 F -0.022175324 1.5373042 0.006525288 -0.0156500368
## 3 GE -0.029873270 1.3534185 0.005866970 -0.0240063002
## 4 IBM -0.006817773 1.4072019 0.006059516 -0.0007582564
## 5 MSFT 0.018216833 0.8067241 0.003909783 0.0221266164
colors = c("darkred","steelblue","darkgreen","yellow4","darkblue")
linestypes = c("solid","longdash","twodash","dashed","dotdash")
shapes = c(15:19)
ggplot(coefs,aes(x = Beta,color = Ticker))+
geom_abline(intercept = Rf_avg,slope = Mkt_avg-Rf_avg,color="grey",size = 2, alpha =0.6)+
geom_point(aes(y=ExpectedReturn,color = Ticker),size = 3,shape=15)+
geom_point(aes(y=RequireReturn,color = Ticker),size = 3)+
geom_point(aes(x=0,y=Rf_avg),color = "darkgreen",size=5,shape = 2)+
geom_point(aes(x=1,y=Mkt_avg),color = "darkgreen",size=5,shape = 2)+
annotate(geom="text", x=1.08, y=Mkt_avg-0.001, label="M(Market portfolio)",
color="darkgreen")+
annotate(geom="text", x=0.1, y=Rf_avg, label="Risk-free Rate",
color="darkgreen")+
geom_segment(aes(x = 1, y = 0, xend = 1, yend = Mkt_avg),linetype="dashed")+
geom_segment(aes(x = 0, y = Mkt_avg, xend = 1, yend = Mkt_avg),linetype="dashed")+
scale_color_manual(values = alpha(colors,0.7)) +
scale_fill_manual(values = alpha(colors,0.7))+
scale_y_continuous(labels = scales::percent)+
scale_x_continuous(expand = c(0.0001, 0, 0.1, 0.1))+
labs(x="Beta", y="Return",title = "Security Market Line")+
theme(panel.border = element_blank())
### Housekeeping
rm(list=ls()) # clear workspace
cat("\014") # clear console
# prepare library
library(quantmod)
library(tidyverse)
library(readr)
library(car)
library(lmtest)
library(AER)
library(lubridate)
library(xts)
### Download Dow Constituents price data from Yahoo Finance
# set parameters
start_date<-"2015-12-01"
end_date<-"2020-12-31"
freq<-"daily"
tickers_code <- c("AXP","AMGN","AAPL","BA","CAT","CSCO","CVX","GS","HD","HON","IBM","INTC","JNJ","KO","JPM","MCD","MMM","MRK","MSFT","NKE","PG","TRV","UNH","CRM","VZ","V","WBA","WMT","DIS")
# pull stock price from yahoo
getSymbols(tickers_code,from=start_date,to=end_date,periodicity=freq,src='yahoo')
## [1] "AXP" "AMGN" "AAPL" "BA" "CAT" "CSCO" "CVX" "GS" "HD" "HON"
## [11] "IBM" "INTC" "JNJ" "KO" "JPM" "MCD" "MMM" "MRK" "MSFT" "NKE"
## [21] "PG" "TRV" "UNH" "CRM" "VZ" "V" "WBA" "WMT" "DIS"
### Load Factor Data
# download factor data from Ken French website via FTP
download.file(url = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip", destfile = "F-F_Research_Data_Factors_CSV.zip",mode="wb")
# read the contents and extract the desired dates
Factors <- read_csv("F-F_Research_Data_Factors_CSV.zip", skip = 3, col_names = T) %>%
na.omit()%>%
dplyr::rename("Date" = "...1") %>%
dplyr::mutate_all(as.numeric) %>%
filter(Date > 196301)
# Format the Factor data frame
FFdate<-as.Date(paste0(as.character(Factors$Date), '01'), format='%Y%m%d')
# let's only keep market factor and RF (since we only want to run CAPM later) and convert them to log return
FFdata<-log(select(Factors, -Date)/100+1)
FFxts<-xts(FFdata,order.by=FFdate)[paste(start_date, "/", end_date, sep = "")]
# print the first several rows
head(FFxts)
## Mkt-RF SMB HML RF
## 2015-12-01 -0.0219389075 -0.028502360 -0.026241311 9.9995e-05
## 2016-01-01 -0.0594315838 -0.034487930 0.020390690 9.9995e-05
## 2016-02-01 -0.0007002451 0.008067371 -0.005716307 1.9998e-04
## 2016-03-01 0.0672847468 0.007571265 0.010939940 1.9998e-04
## 2016-04-01 0.0090588445 0.006677655 0.031595562 9.9995e-05
## 2016-05-01 0.0176434352 -0.001901807 -0.016739324 9.9995e-05
# Compute the excess return and join excess return with factor data
# use `Ad()` to get adjusted closed price
Price = do.call(merge, lapply(tickers_code, function(x) Ad(get(x))))
names(Price) = lapply(tickers_code, function(x) paste(x,".Price",sep=""))
# Extract the last days of each month
Price = Price[endpoints(Price, on='months')]
# Alter the date to match with other series
Price = xts(x = Price, order.by = floor_date(index(Price), "month")) %>%
na.omit()
# Let's keep only the appropriate factor data (price data except the first line, since it will become NA after calculating returns)
FFxts<-FFxts[index(Price)[-1]]
# stocks' excess return
df <- tibble(Ticker = tickers_code,
ExcessReturn.Stock = do.call(c,lapply(tickers_code, function(x)
data.frame(periodReturn(Price[,paste(x,".Price",sep = "")], type='log')[-1,]-FFxts$RF))),
Date = do.call(c,lapply(Ticker, function(x) (list(index(FFxts)))))
)
# Tibble for Factor
FF_df <- tibble(Date = index(Price)[-1],
as.tibble(FFxts)) %>%
select(-RF)
# convert to long table and join with the factors
df_long = df %>%
unnest(colnames(df)) %>%
inner_join(FF_df)
head(df_long)
## # A tibble: 6 × 6
## Ticker ExcessReturn.Stock Date `Mkt-RF` SMB HML
## <chr> <dbl> <date> <dbl> <dbl> <dbl>
## 1 AXP -0.258 2016-01-01 -0.0594 -0.0345 0.0204
## 2 AXP 0.0379 2016-02-01 -0.000700 0.00807 -0.00572
## 3 AXP 0.0994 2016-03-01 0.0673 0.00757 0.0109
## 4 AXP 0.0683 2016-04-01 0.00906 0.00668 0.0316
## 5 AXP 0.00493 2016-05-01 0.0176 -0.00190 -0.0167
## 6 AXP -0.0743 2016-06-01 -0.000500 0.00588 -0.0146
### First-pass Regression
# Apply FF3 regression using market excess return, SMB and HML as independent variables
# Create the IDs for stocks since we cannot use Tickers directly in group_by
Stock_IDs <- tibble(Ticker = unique(df_long$Ticker),
IDs = 1:length(unique(df_long$Ticker)))
# apply the full sample regression to calculate the full sample betas for all stocks
FF3_step1 <- df_long %>%
# unite the Stock_IDs
inner_join(Stock_IDs) %>%
select(-c(Date, Ticker)) %>%
group_by(IDs) %>%
do(model = lm(ExcessReturn.Stock ~ `Mkt-RF` + SMB + HML, data = .) %>%
coefficients())
# Unwrap the results
FF3_step1_result <- do.call(rbind.data.frame, FF3_step1$model) %>%
as_tibble()
# Rename the variables
names(FF3_step1_result) = names(Factors)[-length(Factors)]
names(FF3_step1_result)[1] = "Alpha"
# Add in the Stock_IDs, then add in Ticker
FF3_step1_result <- Stock_IDs %>%
dplyr::inner_join(cbind(FF3_step1[, 1], FF3_step1_result)) %>%
select(-IDs)
head(FF3_step1_result)
## # A tibble: 6 × 5
## Ticker Alpha `Mkt-RF` SMB HML
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 AXP -0.00271 1.27 0.00820 0.331
## 2 AMGN -0.00689 0.794 -0.0243 -0.573
## 3 AAPL 0.00565 1.42 -0.476 -0.706
## 4 BA -0.00233 1.50 0.130 0.953
## 5 CAT 0.00865 0.824 0.436 0.115
## 6 CSCO -0.00167 0.922 -0.0523 -0.115
### Second-pass Regression
# Re-run the FF3 using betas as independent variables
# compute mean returns
Second_reg_data <- df_long %>%
# group by ticker then do calculation
dplyr::group_by(Ticker) %>%
# use summarise to calculate the mean excess return
dplyr::summarise(MeanEXRet = mean(ExcessReturn.Stock)) %>%
# join with beta estimates
dplyr::inner_join(FF3_step1_result) %>%
dplyr::select(-Alpha)
# run FF3 regression using betas as independent variables
FF3_step2 <- lm(MeanEXRet ~ `Mkt-RF` + SMB + HML, data = Second_reg_data)
summary(FF3_step2)
##
## Call:
## lm(formula = MeanEXRet ~ `Mkt-RF` + SMB + HML, data = Second_reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0166380 -0.0027617 -0.0003736 0.0035955 0.0109143
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.002267 0.004161 0.545 0.5907
## `Mkt-RF` 0.008546 0.004161 2.054 0.0506 .
## SMB -0.003547 0.003924 -0.904 0.3746
## HML -0.007220 0.002881 -2.507 0.0191 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.006452 on 25 degrees of freedom
## Multiple R-squared: 0.3173, Adjusted R-squared: 0.2354
## F-statistic: 3.874 on 3 and 25 DF, p-value: 0.02104
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"1990-01-01" #We want from Jan1990 forward.
endd<-"2018-01-01"
freq<-"quarterly"
GDP = Quandl("FRED/GDPC1", type="xts",start_date=startd, end_date=endd)
acf(GDP)
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"1990-01-01" #We want from Jan1990 forward.
endd<-"2018-01-01"
freq<-"quarterly"
GDP = Quandl("FRED/GDPC1", type="xts",start_date=startd, end_date=endd)
pacf(GDP)
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"1990-01-01"
endd<-"2018-01-01"
freq<-"quarterly"
GDP = Quandl("FRED/GDPC1", type="xts",start_date=startd, end_date=endd)
library(tseries)
# KPSS test for trend stationary
kpss.test(GDP, null="Trend")
##
## KPSS Test for Trend Stationarity
##
## data: GDP
## KPSS Trend = 0.37964, Truncation lag parameter = 4, p-value = 0.01
# H0: trend stationarity
# Interpret: Low p-value -->reject the null hypothesis of trend stationary
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"1990-01-01"
endd<-"2018-01-01"
freq<-"quarterly"
GDP = Quandl("FRED/GDPC1", type="xts",start_date=startd, end_date=endd)
# ADF Test for unit root, which is a common type of non-stationary
library(tseries)
adf.test(GDP, alternative = "stationary")
##
## Augmented Dickey-Fuller Test
##
## data: GDP
## Dickey-Fuller = -1.9904, Lag order = 4, p-value = 0.5806
## alternative hypothesis: stationary
# H0: Not stationary
# Interpret: High p-value -->fail to reject the null hypothesis of non-stationary
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"1990-01-01"
endd<-"2018-01-01"
freq<-"quarterly"
GDP = Quandl("FRED/GDPC1", type="xts",start_date=startd, end_date=endd)
colnames(GDP)[1]="Level" # Add column name
# Use ADF test to check if it is non-stationary
library(tseries)
adf.test(GDP$Level)
##
## Augmented Dickey-Fuller Test
##
## data: GDP$Level
## Dickey-Fuller = -1.9904, Lag order = 4, p-value = 0.5806
## alternative hypothesis: stationary
# Interpret: High p-value -->fail to reject the null hypothesis of non-stationary. This series is non-stationary
In order to get this in a commonly used format, we will first take logs, noting that the a first diff of logs is a % change.
GDP$LogLevel<- log(GDP$Level)
GDP$FirstDifLog<- diff(GDP$LogLevel, differences = 1)
GDP<-na.omit(GDP) # get rid of the NAs
# Now use the ADF test again to check if it is still non-stationary
adf.test(GDP$FirstDifLog)
##
## Augmented Dickey-Fuller Test
##
## data: GDP$FirstDifLog
## Dickey-Fuller = -4.2675, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
# Interpret: Low p-value --> reject the null hypothesis of non-stationarity
# Our data apears to now be stationary due to differencing
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(quantmod)
startd <-"2013-01-01"
endd <-"2013-12-31"
SPY <- Ad(getSymbols("SPY", from = startd, to = endd, auto.assign = FALSE)) # get the adjusted closed
IVV <- Ad(getSymbols("IVV", from = startd, to = endd, auto.assign = FALSE)) # get the adjusted closed
### Step 1: Finding the integration order of each series
library(tseries)
kpss.test(SPY, null="Trend") # Low p-value --> reject H0 -->Non stationary.
##
## KPSS Test for Trend Stationarity
##
## data: SPY
## KPSS Trend = 0.20051, Truncation lag parameter = 5, p-value = 0.01581
kpss.test(IVV, null="Trend") # Low p-value --> reject H0 -->Non stationary.
##
## KPSS Test for Trend Stationarity
##
## data: IVV
## KPSS Trend = 0.19944, Truncation lag parameter = 5, p-value = 0.01621
# Take first difference
SPY_d1<-diff(SPY,differences = 1)
IVV_d1<-diff(IVV,differences = 1)
# ADF test for unit root
adf.test(na.omit(SPY_d1)) # low p-value --> reject H0 -->SPY_d1 is stationary --> SPY is I(1).
##
## Augmented Dickey-Fuller Test
##
## data: na.omit(SPY_d1)
## Dickey-Fuller = -5.742, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
adf.test(na.omit(IVV_d1)) # low p-value --> reject H0 -->IVV_d1 is stationary --> IVV is I(1).
##
## Augmented Dickey-Fuller Test
##
## data: na.omit(IVV_d1)
## Dickey-Fuller = -5.711, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
#Interpret: Series Y_t(IVV) and X_t(SPY) have the same integration order (1)
### Step 2: Estimate cointegration coefficient and get residuals
fit<-lm(IVV~SPY)
res<-fit$residuals
### Step 3: Do ADF test for unit root
adf.test(res) # Low p-value --> reject H0--> Residuals are stationary.
##
## Augmented Dickey-Fuller Test
##
## data: res
## Dickey-Fuller = -6.0557, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
#Interpret: Y_t(IVV) and X_t(SPY) are co-integrated.
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"1990-01-01"
endd<-"2018-01-01"
freq<-"quarterly"
GDP <- Quandl("FRED/GDPC1", type="xts",start_date=startd, end_date=endd)
GDPGrowth <- diff(log(GDP)) # calculate log growth rate
GDPGrowth <- na.omit(GDPGrowth) # get rid of the NAs
# Specify the ARIMA model you with your choice or p, q values...(p=AR order, d=differencing, q= MA order)
AR1 <- arima(GDPGrowth, order = c(1, 0, 0)) # fit an AR(1) model
AR1 # print the model
##
## Call:
## arima(x = GDPGrowth, order = c(1, 0, 0))
##
## Coefficients:
## ar1 intercept
## 0.4050 6e-03
## s.e. 0.0858 9e-04
##
## sigma^2 estimated as 2.917e-05: log likelihood = 425.77, aic = -845.54
MA1 <- arima(GDPGrowth, order = c(0, 0, 1)) # fit an MA(1) model
MA1 # print the model
##
## Call:
## arima(x = GDPGrowth, order = c(0, 0, 1))
##
## Coefficients:
## ma1 intercept
## 0.2868 6e-03
## s.e. 0.0733 7e-04
##
## sigma^2 estimated as 3.097e-05: log likelihood = 422.46, aic = -838.91
AR1MA1 <- arima(GDPGrowth, order = c(1, 0, 1)) # fit an ARMA(1,1) model
AR1MA1 # print the model
##
## Call:
## arima(x = GDPGrowth, order = c(1, 0, 1))
##
## Coefficients:
## ar1 ma1 intercept
## 0.7001 -0.3553 0.0060
## s.e. 0.1368 0.1750 0.0011
##
## sigma^2 estimated as 2.828e-05: log likelihood = 427.46, aic = -846.92
We can use the auto.arima() function from the
forecast package to ask R to return the best ARIMA model
according to either AIC, AICc or BIC value.
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"1990-01-01"
endd<-"2018-01-01"
freq<-"quarterly"
GDP <- Quandl("FRED/GDPC1", type="xts",start_date=startd, end_date=endd)
GDPGrowth <- diff(log(GDP)) # calculate log growth rate
GDPGrowth <- na.omit(GDPGrowth) # get rid of the NAs
library(forecast)
AR <- auto.arima(GDPGrowth, max.q = 0) # set the maximum value of MA order q=0 to auto fit an AR model
AR # print the model
## Series: GDPGrowth
## ARIMA(2,0,0) with non-zero mean
##
## Coefficients:
## ar1 ar2 mean
## 0.3292 0.1861 0.006
## s.e. 0.0921 0.0924 0.001
##
## sigma^2 = 2.89e-05: log likelihood = 427.76
## AIC=-847.52 AICc=-847.15 BIC=-836.65
MA <- auto.arima(GDPGrowth, max.p = 0) # set the maximum value of AR order p=0 to auto fit an AR model
MA # print the model
## Series: GDPGrowth
## ARIMA(0,0,3) with non-zero mean
##
## Coefficients:
## ma1 ma2 ma3 mean
## 0.3227 0.2925 0.1340 0.0061
## s.e. 0.0921 0.0957 0.0816 0.0009
##
## sigma^2 = 2.937e-05: log likelihood = 427.39
## AIC=-844.78 AICc=-844.21 BIC=-831.18
ARMA <- auto.arima(GDPGrowth) # fit an ARMA model(the fitted model might be AR, MA or ARIMA)
ARMA # print the model
## Series: GDPGrowth
## ARIMA(2,0,0) with non-zero mean
##
## Coefficients:
## ar1 ar2 mean
## 0.3292 0.1861 0.006
## s.e. 0.0921 0.0924 0.001
##
## sigma^2 = 2.89e-05: log likelihood = 427.76
## AIC=-847.52 AICc=-847.15 BIC=-836.65
A seasonal autoregressive integrated moving average (SARIMA) model is one step different from an ARIMA model based on the concept of seasonal trends.
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"2000-01-01"
endd<-"2018-01-01"
ticker <- "FRED/HOUSTNSA" #New Privately-Owned Housing Units Started: Total Units
HSNG <- Quandl(ticker, type="ts",start_date=startd, end_date=endd)
{plot(HSNG)
abline(v = ts(c(2000,2005,2010,2015)),col = "red") # v is for vertical lines
abline(v = ts(c(2001,2002,2003,2004)), col = "blue", lty = 2)}
# Evidence of seasonality
### Decide the order of SARIMA
library(astsa)
acf2(HSNG, 48) # Interpret: The slow decay shown in the ACF is a sign that differencing may be required
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## ACF 0.96 0.92 0.87 0.82 0.79 0.77 0.77 0.78 0.81 0.85 0.88 0.88 0.85
## PACF 0.96 -0.06 -0.12 0.06 0.15 0.15 0.13 0.21 0.26 0.22 0.08 -0.12 -0.32
## [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25]
## ACF 0.8 0.74 0.69 0.65 0.62 0.61 0.61 0.63 0.65 0.67 0.67 0.64
## PACF -0.4 -0.22 -0.05 0.14 0.00 0.05 -0.21 0.01 -0.02 -0.05 0.00 -0.03
## [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37]
## ACF 0.58 0.52 0.46 0.42 0.39 0.37 0.36 0.37 0.38 0.39 0.39 0.35
## PACF -0.02 -0.08 -0.07 0.08 -0.05 -0.06 -0.05 0.03 -0.10 0.00 0.07 -0.04
## [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48]
## ACF 0.30 0.24 0.19 0.15 0.12 0.10 0.10 0.10 0.12 0.13 0.12
## PACF 0.01 0.07 0.05 0.03 0.02 0.03 -0.04 0.02 0.09 0.01 0.01
acf2(diff(HSNG), 48) # Interpret: Even with the first order of differencing, we observe that there is still slow decay in the ACF plots at a seasonal lag period of 12. This thus suggest a seasonal difference to be applied.
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## ACF 0.03 0.11 -0.07 -0.17 -0.18 -0.17 -0.21 -0.19 -0.09 0.08 0.28 0.41
## PACF 0.03 0.11 -0.08 -0.18 -0.17 -0.14 -0.21 -0.27 -0.22 -0.08 0.12 0.32
## [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## ACF 0.35 0.06 -0.07 -0.22 -0.10 -0.22 -0.07 -0.31 -0.06 0.10 0.24 0.36
## PACF 0.40 0.20 0.03 -0.18 -0.03 -0.06 0.20 -0.02 0.01 0.04 -0.01 0.00
## [,25] [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36]
## ACF 0.28 0.12 -0.08 -0.22 -0.10 -0.14 -0.17 -0.20 -0.06 0.03 0.2 0.40
## PACF -0.01 0.05 0.04 -0.10 0.03 0.04 0.03 -0.07 0.08 -0.04 -0.1 0.01
## [,37] [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48]
## ACF 0.19 0.09 -0.08 -0.15 -0.15 -0.13 -0.12 -0.18 -0.12 0.09 0.17 0.35
## PACF -0.03 -0.09 -0.07 -0.05 -0.04 -0.04 0.02 -0.03 -0.09 -0.01 -0.02 0.04
acf2(diff(diff(HSNG),12),48)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## ACF -0.53 0.06 0.02 0.08 -0.06 0.06 -0.12 0.09 -0.04 -0.02 0.24 -0.48
## PACF -0.53 -0.32 -0.17 0.05 0.07 0.13 -0.05 -0.05 -0.07 -0.10 0.33 -0.28
## [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## ACF 0.34 -0.09 -0.01 -0.06 0.09 -0.11 0.19 -0.18 0.04 0.07 0.00 -0.06
## PACF -0.05 -0.08 -0.07 -0.01 0.02 0.00 0.10 0.02 -0.11 0.00 0.25 -0.18
## [,25] [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36]
## ACF 0.02 0.06 0.01 -0.05 0.02 0.07 -0.14 0.09 0.05 -0.11 0.00 0.08
## PACF 0.02 0.08 0.08 0.01 0.05 0.04 -0.03 -0.07 0.05 0.04 0.12 -0.14
## [,37] [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48]
## ACF -0.08 0.02 -0.06 0.04 -0.04 -0.02 0.03 0.00 -0.05 0.06 -0.06 0.03
## PACF -0.05 0.01 -0.08 -0.10 -0.06 -0.05 -0.05 -0.08 0.00 -0.01 -0.02 -0.11
# Interpret:
# Seasonal Order: From the seasonal lag perspective, we can see that the ACF cuts off at the 2nd seasonal lag, while the PACF appears to tail off. This would suggest a SARMA model of (0,2).
# ARMA Order: Within the first seasonal cycle, it can be seen that the ACF appears to be cutting off at lag = 1, while PACF appears to be cutting off at lag = 3.
# Thus a proposed model can be ARMA (3,1) x Seasonal (0,3)(lag = 12) for the differenced time series.
### Fit SARIMA Model
arima(HSNG, order = c(3,1,1), seasonal = list(order = c(0,1,2), period = 12))
##
## Call:
## arima(x = HSNG, order = c(3, 1, 1), seasonal = list(order = c(0, 1, 2), period = 12))
##
## Coefficients:
## ar1 ar2 ar3 ma1 sma1 sma2
## 0.3734 0.3349 0.2163 -0.9246 -0.6783 -0.0807
## s.e. 0.0870 0.0733 0.0704 0.0493 0.0784 0.0782
##
## sigma^2 estimated as 74.76: log likelihood = -734.27, aic = 1482.55
library(forecast)
sarima(HSNG, p=3, d=1, q=1, P=0, D=1, Q=2, S=12) # sarima() takes in arguments in the following order: data, ARIMA inputs (p,d,q), SARIMA inputs (P,D,Q), and seasonal lag S
## initial value 2.564457
## iter 2 value 2.298184
## iter 3 value 2.195428
## iter 4 value 2.189850
## iter 5 value 2.183666
## iter 6 value 2.183389
## iter 7 value 2.183263
## iter 8 value 2.183249
## iter 9 value 2.183238
## iter 10 value 2.183220
## iter 11 value 2.183147
## iter 12 value 2.182860
## iter 13 value 2.182738
## iter 14 value 2.182723
## iter 15 value 2.181411
## iter 16 value 2.180273
## iter 17 value 2.180018
## iter 18 value 2.179797
## iter 19 value 2.179795
## iter 20 value 2.179794
## iter 21 value 2.179794
## iter 22 value 2.179794
## iter 23 value 2.179793
## iter 24 value 2.179793
## iter 24 value 2.179793
## iter 24 value 2.179793
## final value 2.179793
## converged
## initial value 2.184982
## iter 2 value 2.184962
## iter 3 value 2.184622
## iter 4 value 2.184601
## iter 5 value 2.184203
## iter 6 value 2.183855
## iter 7 value 2.183550
## iter 8 value 2.181614
## iter 9 value 2.180869
## iter 10 value 2.180682
## iter 11 value 2.180520
## iter 12 value 2.180464
## iter 13 value 2.180444
## iter 14 value 2.180441
## iter 15 value 2.180441
## iter 16 value 2.180440
## iter 17 value 2.180440
## iter 17 value 2.180440
## iter 17 value 2.180440
## final value 2.180440
## converged
## $fit
##
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S),
## include.mean = !no.constant, transform.pars = trans, fixed = fixed, optim.control = list(trace = trc,
## REPORT = 1, reltol = tol))
##
## Coefficients:
## ar1 ar2 ar3 ma1 sma1 sma2
## 0.3734 0.3349 0.2163 -0.9246 -0.6783 -0.0807
## s.e. 0.0870 0.0733 0.0704 0.0493 0.0784 0.0782
##
## sigma^2 estimated as 74.76: log likelihood = -734.27, aic = 1482.55
##
## $degrees_of_freedom
## [1] 198
##
## $ttable
## Estimate SE t.value p.value
## ar1 0.3734 0.0870 4.2919 0.0000
## ar2 0.3349 0.0733 4.5670 0.0000
## ar3 0.2163 0.0704 3.0738 0.0024
## ma1 -0.9246 0.0493 -18.7387 0.0000
## sma1 -0.6783 0.0784 -8.6496 0.0000
## sma2 -0.0807 0.0782 -1.0320 0.3034
##
## $AIC
## [1] 7.267385
##
## $AICc
## [1] 7.269475
##
## $BIC
## [1] 7.381242
### Forecast
sarima.for(HSNG, n.ahead = 20, p=3, d=1, q=1, P=0, D=1, Q=2, S=12) # forecast prediction for next 20 time points
## $pred
## Jan Feb Mar Apr May Jun Jul
## 2018 89.94963 102.27485 114.10191 114.38431 119.99422 118.98789
## 2019 93.38826 95.01798 107.50834 118.91902 119.63144 124.58177 123.90903
## Aug Sep Oct Nov Dec
## 2018 110.90079 112.45802 113.53182 101.75552 91.62767
## 2019 115.99305 117.36446
##
## $se
## Jan Feb Mar Apr May Jun Jul
## 2018 8.646530 9.477341 10.713797 12.129696 13.232276 14.373052
## 2019 20.580318 22.526882 23.813414 25.176268 26.565959 27.872729 29.174661
## Aug Sep Oct Nov Dec
## 2018 15.479904 16.538307 17.580943 18.599896 19.598315
## 2019 30.455190 31.710785
We can use the auto.arima() function from the
forecast package and set seasonal = TRUE to
ask R to return the best ARIMA/SARIMA model according to either AIC,
AICc or BIC value.
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"2000-01-01"
endd<-"2018-01-01"
ticker <- "FRED/HOUSTNSA" #New Privately-Owned Housing Units Started: Total Units
HSNG <- Quandl(ticker, type="ts",start_date=startd, end_date=endd)
{plot(HSNG)
abline(v = ts(c(2000,2005,2010,2015)),col = "red") # v is for vertical lines
abline(v = ts(c(2001,2002,2003,2004)), col = "blue", lty = 2)}
# Evidence of seasonality
### Fit SARIMA Model
library(forecast)
auto.arima(HSNG, seasonal = TRUE)
## Series: HSNG
## ARIMA(1,1,1)(0,1,2)[12]
##
## Coefficients:
## ar1 ma1 sma1 sma2
## -0.2223 -0.3329 -0.6617 -0.1001
## s.e. 0.1168 0.1079 0.0783 0.0768
##
## sigma^2 = 77.14: log likelihood = -735.63
## AIC=1481.27 AICc=1481.57 BIC=1497.86
# We have a model with ARIMA order (p=1,d=1,q=1), seasonal order (P=0,D=1,Q=2), and seasonal lag S=12
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"1990-01-01"
endd<-"2018-01-01"
freq<-"quarterly"
GDP <- Quandl("FRED/GDPC1", type="xts",start_date=startd, end_date=endd)
GDPGrowth <- diff(log(GDP)) # calculate log growth rate
GDPGrowth <- na.omit(GDPGrowth) # get rid of the NAs
library(forecast)
fit <- auto.arima(GDPGrowth)
fit
## Series: GDPGrowth
## ARIMA(2,0,0) with non-zero mean
##
## Coefficients:
## ar1 ar2 mean
## 0.3292 0.1861 0.006
## s.e. 0.0921 0.0924 0.001
##
## sigma^2 = 2.89e-05: log likelihood = 427.76
## AIC=-847.52 AICc=-847.15 BIC=-836.65
### Ensure errors white noise.
# Ideally, residuals should look like white noise, meaning they are
# normally distributed.
# We will use tsdisplay to check the residuals for our optimal model.
tsdisplay(residuals(fit), lag.max=45, main='Model Residuals')
# Interpret: The ACF and PACF indicate little to no persistence. The time series plot shows little remaining patterns
# to the data.
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"1990-01-01"
endd<-"2018-01-01"
freq<-"quarterly"
GDP <- Quandl("FRED/GDPC1", type="xts",start_date=startd, end_date=endd)
GDPGrowth <- diff(log(GDP)) # calculate log growth rate
GDPGrowth <- na.omit(GDPGrowth) # get rid of the NAs
library(forecast)
fit <- auto.arima(GDPGrowth)
fit
## Series: GDPGrowth
## ARIMA(2,0,0) with non-zero mean
##
## Coefficients:
## ar1 ar2 mean
## 0.3292 0.1861 0.006
## s.e. 0.0921 0.0924 0.001
##
## sigma^2 = 2.89e-05: log likelihood = 427.76
## AIC=-847.52 AICc=-847.15 BIC=-836.65
### Compute the Ljung–Box test statistic for examining the null hypothesis of independence in a given time series.
Box.test(residuals(fit),lag=4,type="Ljung")
##
## Box-Ljung test
##
## data: residuals(fit)
## X-squared = 0.0055806, df = 4, p-value = 1
# Ho: No autocorrelation
#Interpret: Given the high p-value, we fail to reject the null of no persistence through 4 lags.
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"1990-01-01"
endd<-"2018-01-01"
freq<-"quarterly"
GDP <- Quandl("FRED/GDPC1", type="xts",start_date=startd, end_date=endd)
GDPGrowth <- diff(log(GDP)) # calculate log growth rate
GDPGrowth <- na.omit(GDPGrowth) # get rid of the NAs
library(forecast)
fit <- auto.arima(GDPGrowth)
fit
## Series: GDPGrowth
## ARIMA(2,0,0) with non-zero mean
##
## Coefficients:
## ar1 ar2 mean
## 0.3292 0.1861 0.006
## s.e. 0.0921 0.0924 0.001
##
## sigma^2 = 2.89e-05: log likelihood = 427.76
## AIC=-847.52 AICc=-847.15 BIC=-836.65
### Let's also test the null of normal distribution via a KS test
ks.test(residuals(fit),pnorm)
##
## One-sample Kolmogorov-Smirnov test
##
## data: residuals(fit)
## D = 0.49566, p-value < 2.2e-16
## alternative hypothesis: two-sided
#Interpret: Given the low pvalue, we reject the null, implying that the residuals aren't normally distributed
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"1990-01-01"
endd<-"2018-01-01"
freq<-"quarterly"
GDP <- Quandl("FRED/GDPC1", type="xts",start_date=startd, end_date=endd)
GDPGrowth <- diff(log(GDP)) # calculate log growth rate
GDPGrowth <- na.omit(GDPGrowth) # get rid of the NAs
library(forecast)
fit <- auto.arima(GDPGrowth)
fit
## Series: GDPGrowth
## ARIMA(2,0,0) with non-zero mean
##
## Coefficients:
## ar1 ar2 mean
## 0.3292 0.1861 0.006
## s.e. 0.0921 0.0924 0.001
##
## sigma^2 = 2.89e-05: log likelihood = 427.76
## AIC=-847.52 AICc=-847.15 BIC=-836.65
### Let's see if the residuals look like a normal distribution with a qq plot
qqnorm(residuals(fit));
qqline(residuals(fit))
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"1990-01-01"
endd<-"2018-01-01"
freq<-"quarterly"
GDP <- Quandl("FRED/GDPC1", type="xts",start_date=startd, end_date=endd)
GDPGrowth <- diff(log(GDP)) # calculate log growth rate
GDPGrowth <- na.omit(GDPGrowth) # get rid of the NAs
library(forecast)
Model1<-Arima(GDPGrowth, order = c(1, 0, 0)) # fit an AR(1) model
Model1
## Series: GDPGrowth
## ARIMA(1,0,0) with non-zero mean
##
## Coefficients:
## ar1 mean
## 0.4050 6e-03
## s.e. 0.0858 9e-04
##
## sigma^2 = 2.97e-05: log likelihood = 425.77
## AIC=-845.54 AICc=-845.32 BIC=-837.39
Model2<-Arima(GDPGrowth, order = c(0, 0, 3)) # fit an MA(3) model
Model2
## Series: GDPGrowth
## ARIMA(0,0,3) with non-zero mean
##
## Coefficients:
## ma1 ma2 ma3 mean
## 0.3227 0.2925 0.1340 0.0061
## s.e. 0.0921 0.0957 0.0816 0.0009
##
## sigma^2 = 2.937e-05: log likelihood = 427.39
## AIC=-844.78 AICc=-844.21 BIC=-831.18
Notice that the AIC of Model2 is -844.78, which is smaller than then Model1’s AIC -845.54, Model2 is better or with higher predict accuracy than Model1.
ARCH effect is concerned with a relationship within the heteroskedasticity, often termed serial correlation of the heteroskedasticity.
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"2010-01-01" #We want from Jan2010 forward.
endd<-"2018-01-01"
freq<-"weekly"
trans<-"rdiff" # calculate simple return
ticker <- "WIKI/JPM.11" # we load the JPM closed price data (the 11th column)
JPM <- Quandl(ticker,transform=trans,start_date=startd, end_date=endd, collapse=freq,type="xts") # Careful here. The `rdiff` provides the simple return. We should convert to log returns since we are running a regression.
colnames(JPM )[1]="SimpleRet" # Add column name
JPM$LogRet<-log(1+JPM $SimpleRet)
The ARCH model assumes that the conditional mean of the error term in a time series model is constant (zero), but its conditional variance is not.
### Get Residual Square
library(dplyr)
reg<-lm(JPM$LogRet~1) # demean the return series by regress it on constant only
JPM$DMLogRet<-resid(reg) # get residuals, which is the de-mean value of log return
JPM$Sq_DMLogRet<-JPM$DMLogRet^2 # let's compute the squared residuals (i.e. JPM$DMLogRet)
acf(JPM$Sq_DMLogRet) # use the ACF to see if there appears to be persistence in the squared residuals
### Engle's ARCH LM test
# Engle's ARCH LM test is the most commonly applied standard test to detect autoregressive conditional heteroscedasticity. We start with the regression of squared residuals upon lagged squared residuals
ARCH <- lm(JPM$Sq_DMLogRet~lag.xts(JPM$Sq_DMLogRet,k=1))
summary(ARCH)
##
## Call:
## lm(formula = JPM$Sq_DMLogRet ~ lag.xts(JPM$Sq_DMLogRet, k = 1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0031486 -0.0009597 -0.0007224 0.0001594 0.0143211
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0009616 0.0001200 8.012 1.17e-14 ***
## lag.xts(JPM$Sq_DMLogRet, k = 1) 0.1558554 0.0486171 3.206 0.00145 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.002168 on 413 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.02428, Adjusted R-squared: 0.02192
## F-statistic: 10.28 on 1 and 413 DF, p-value: 0.001452
# Calculate the LM test statistic
library(broom)
RSq_ARCH <- glance(ARCH)[[1]] # grab the R square value of ARCH model
L <- length(JPM$Sq_DMLogRet) # lengths for data
q <- length(coef(ARCH))-1 # degrees of freedom q
LM <- (L-q)*RSq_ARCH # Compute the LM stat as (L-q)*Rsquare
LM
## [1] 10.07604
# Calculate the critical value
alpha <- 0.05 # set significance levels
Critical <- qchisq(1-alpha, q) # get chi-squared stat(the arch test stat is a chi-squared)
Critical
## [1] 3.841459
# Interpret: Null hypothesis is white noise (i.e. no ARCH effects). Test stat is greater than critical value, so we reject, implying ARCH effects exist.
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"2010-01-01" #We want from Jan2010 forward.
endd<-"2018-01-01"
freq<-"weekly"
trans<-"rdiff" # calculate simple return
ticker <- "WIKI/JPM.11" # we load the JPM closed price data (the 11th column)
JPM <- Quandl(ticker,transform=trans,start_date=startd, end_date=endd, collapse=freq,type="xts")
colnames(JPM )[1]="SimpleRet" # Add column name
# Careful here. The `rdiff` provides the simple return. We should convert to log returns since we are running a regression.
JPM$LogRet<-log(1+JPM $SimpleRet)
### Fit GARCH(1,1) Model
library(rugarch)
# GARCH specification
garchSpec <- ugarchspec(variance.model=list(model="sGARCH",garchOrder=c(1,1)),
mean.model=list(armaOrder=c(0,0)),
distribution.model="norm")
# Estimate coefficients
garchFit <- ugarchfit(spec=garchSpec, data=JPM$LogRet)
garchFit
##
## *---------------------------------*
## * GARCH Model Fit *
## *---------------------------------*
##
## Conditional Variance Dynamics
## -----------------------------------
## GARCH Model : sGARCH(1,1)
## Mean Model : ARFIMA(0,0,0)
## Distribution : norm
##
## Optimal Parameters
## ------------------------------------
## Estimate Std. Error t value Pr(>|t|)
## mu 0.003194 0.001534 2.0819 0.037352
## omega 0.000066 0.000051 1.2876 0.197882
## alpha1 0.063833 0.033107 1.9281 0.053843
## beta1 0.873420 0.075063 11.6358 0.000000
##
## Robust Standard Errors:
## Estimate Std. Error t value Pr(>|t|)
## mu 0.003194 0.001503 2.12504 0.033583
## omega 0.000066 0.000075 0.88779 0.374655
## alpha1 0.063833 0.050937 1.25317 0.210144
## beta1 0.873420 0.113811 7.67428 0.000000
##
## LogLikelihood : 834.5133
##
## Information Criteria
## ------------------------------------
##
## Akaike -3.9929
## Bayes -3.9541
## Shibata -3.9930
## Hannan-Quinn -3.9775
##
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 0.7315 0.3924
## Lag[2*(p+q)+(p+q)-1][2] 1.2946 0.4118
## Lag[4*(p+q)+(p+q)-1][5] 3.5249 0.3195
## d.o.f=0
## H0 : No serial correlation
##
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 0.2204 0.6387
## Lag[2*(p+q)+(p+q)-1][5] 1.3984 0.7649
## Lag[4*(p+q)+(p+q)-1][9] 2.3902 0.8537
## d.o.f=2
##
## Weighted ARCH LM Tests
## ------------------------------------
## Statistic Shape Scale P-Value
## ARCH Lag[3] 0.09997 0.500 2.000 0.7519
## ARCH Lag[5] 0.62060 1.440 1.667 0.8475
## ARCH Lag[7] 1.14186 2.315 1.543 0.8894
##
## Nyblom stability test
## ------------------------------------
## Joint Statistic: 0.9341
## Individual Statistics:
## mu 0.1905
## omega 0.1325
## alpha1 0.4935
## beta1 0.2145
##
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic: 1.07 1.24 1.6
## Individual Statistic: 0.35 0.47 0.75
##
## Sign Bias Test
## ------------------------------------
## t-value prob sig
## Sign Bias 1.2238 0.22172
## Negative Sign Bias 1.2018 0.23014
## Positive Sign Bias 0.3963 0.69208
## Joint Effect 10.7865 0.01294 **
##
##
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
## group statistic p-value(g-1)
## 1 20 21.98 0.2852
## 2 30 36.88 0.1492
## 3 40 41.88 0.3468
## 4 50 48.66 0.4867
##
##
## Elapsed time : 0.06580114
# Interpret: The alpha and beta (ARCH and GARCH terms) appear significant. The Ljung Box text of residuals appears to indicate no more persistence in the residuals nor squared residuals. The LM tests suggest no more ARCH effects.
Let’s try to fit another popular GARCH model; one with a threshold variable controlling the level of volatility (i.e. GJRGARCH). See here http://www.scienpress.com/upload/jsem/vol%202_3_6.pdf for a nice general listing of GARCH variants.
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"2010-01-01" #We want from Jan2010 forward.
endd<-"2018-01-01"
freq<-"weekly"
trans<-"rdiff" # calculate simple return
ticker <- "WIKI/JPM.11" # we load the JPM closed price data (the 11th column)
JPM <- Quandl(ticker,transform=trans,start_date=startd, end_date=endd, collapse=freq,type="xts")
colnames(JPM )[1]="SimpleRet" # Add column name
# Careful here. The `rdiff` provides the simple return. We should convert to log returns since we are running a regression.
JPM$LogRet<-log(1+JPM $SimpleRet)
### Fit GJR-GARCH(1,1) Model
library(rugarch)
# GJR-GARCH specification
garchSpecGJR <- ugarchspec(variance.model=list(model="fGARCH", garchOrder=c(1,1), submodel="GJRGARCH"),
mean.model=list(armaOrder=c(0,0)),
distribution.model="norm")
# Estimate coefficients
gjrgarchFit <- ugarchfit(spec=garchSpecGJR, data=JPM$LogRet)
gjrgarchFit
##
## *---------------------------------*
## * GARCH Model Fit *
## *---------------------------------*
##
## Conditional Variance Dynamics
## -----------------------------------
## GARCH Model : fGARCH(1,1)
## fGARCH Sub-Model : GJRGARCH
## Mean Model : ARFIMA(0,0,0)
## Distribution : norm
##
## Optimal Parameters
## ------------------------------------
## Estimate Std. Error t value Pr(>|t|)
## mu 0.002505 0.001494 1.6766 0.093622
## omega 0.000096 0.000069 1.3984 0.161978
## alpha1 0.042470 0.031481 1.3491 0.177313
## beta1 0.822647 0.101647 8.0932 0.000000
## eta11 0.999806 0.499220 2.0027 0.045206
##
## Robust Standard Errors:
## Estimate Std. Error t value Pr(>|t|)
## mu 0.002505 0.001563 1.60317 0.108898
## omega 0.000096 0.000140 0.68351 0.494284
## alpha1 0.042470 0.043749 0.97078 0.331660
## beta1 0.822647 0.207661 3.96149 0.000074
## eta11 0.999806 0.000411 2433.92991 0.000000
##
## LogLikelihood : 841.6567
##
## Information Criteria
## ------------------------------------
##
## Akaike -4.0224
## Bayes -3.9739
## Shibata -4.0227
## Hannan-Quinn -4.0032
##
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 0.4094 0.5223
## Lag[2*(p+q)+(p+q)-1][2] 1.0424 0.4847
## Lag[4*(p+q)+(p+q)-1][5] 3.5594 0.3143
## d.o.f=0
## H0 : No serial correlation
##
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 0.08875 0.7658
## Lag[2*(p+q)+(p+q)-1][5] 2.24595 0.5615
## Lag[4*(p+q)+(p+q)-1][9] 3.61417 0.6548
## d.o.f=2
##
## Weighted ARCH LM Tests
## ------------------------------------
## Statistic Shape Scale P-Value
## ARCH Lag[3] 0.3012 0.500 2.000 0.5831
## ARCH Lag[5] 0.8767 1.440 1.667 0.7699
## ARCH Lag[7] 1.6946 2.315 1.543 0.7816
##
## Nyblom stability test
## ------------------------------------
## Joint Statistic: 0.9148
## Individual Statistics:
## mu 0.09678
## omega 0.08451
## alpha1 0.27392
## beta1 0.16032
## eta11 0.27395
##
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic: 1.28 1.47 1.88
## Individual Statistic: 0.35 0.47 0.75
##
## Sign Bias Test
## ------------------------------------
## t-value prob sig
## Sign Bias 1.7138 0.08731 *
## Negative Sign Bias 0.1439 0.88561
## Positive Sign Bias 0.3059 0.75984
## Joint Effect 7.3349 0.06196 *
##
##
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
## group statistic p-value(g-1)
## 1 20 21.02 0.3357
## 2 30 31.12 0.3600
## 3 40 39.77 0.4357
## 4 50 55.15 0.2533
##
##
## Elapsed time : 0.268625
The vector auto-regression (VAR) model extends the idea of uni-variate auto-regression to k time series regressions, where the lagged values of all k series appear as regressors. One the one hand, economic elements like GDP, investment and consumer spending, all depends upon interest rates. One the other hand, the level of interest rates is also set, in part, by the prospects for economic growth and inflation. Hence, we need a VAR model.
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd <-"1980-04-01"
endd <-"2012-12-31"
TB3MS <- Quandl("FRED/TB3MS",start_date=startd, end_date=endd,type="ts")
TB10YS <- Quandl("FRED/GS10",start_date=startd, end_date=endd,type="ts")
GDP <- Quandl("FRED/GDPC96",start_date=startd, end_date=endd,type="ts", transform="rdiff")# note this is simple return, we need log return since we want to run regression
TSpread <- TB10YS - TB3MS
TSpread <- aggregate(TSpread,nfrequency=4,FUN=mean)# aggregate monthly data to quarterly(averages)
GDPGrowth = 400*log(1+GDP)# annual log growth rate%
# Visual inspection
plot(cbind(GDPGrowth, TSpread),xlab = "Date", main='RGDP growth and Term spread')
library(vars)
### Step 1: Model selection (use information criteria to decide upon the number of lags to include)
VAR_Data<-na.omit(ts.union(GDPGrowth, TSpread)) #set up data for estimation
VARselect(VAR_Data, lag.max = 12, type = "const")$selection #obtain the best lag period
## AIC(n) HQ(n) SC(n) FPE(n)
## 2 2 2 2
# Interpret: All the information criteria suggest using lags = 2 --> we need to set p=2 when estimating the model
### Step 2: Estimate VAR model
VAR_fit<-VAR(y = VAR_Data, p = 2)
summary(VAR_fit)
##
## VAR Estimation Results:
## =========================
## Endogenous variables: GDPGrowth, TSpread
## Deterministic variables: const
## Sample size: 128
## Log Likelihood: -381.122
## Roots of the characteristic polynomial:
## 0.8208 0.8208 0.1958 0.1958
## Call:
## VAR(y = VAR_Data, p = 2)
##
##
## Estimation results for equation GDPGrowth:
## ==========================================
## GDPGrowth = GDPGrowth.l1 + TSpread.l1 + GDPGrowth.l2 + TSpread.l2 + const
##
## Estimate Std. Error t value Pr(>|t|)
## GDPGrowth.l1 0.29512 0.08356 3.532 0.000582 ***
## TSpread.l1 -0.87548 0.36949 -2.369 0.019373 *
## GDPGrowth.l2 0.21663 0.08173 2.651 0.009089 **
## TSpread.l2 1.29865 0.37148 3.496 0.000658 ***
## const 0.48692 0.47198 1.032 0.304256
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 2.421 on 123 degrees of freedom
## Multiple R-Squared: 0.3111, Adjusted R-squared: 0.2887
## F-statistic: 13.88 on 4 and 123 DF, p-value: 2.246e-09
##
##
## Estimation results for equation TSpread:
## ========================================
## TSpread = GDPGrowth.l1 + TSpread.l1 + GDPGrowth.l2 + TSpread.l2 + const
##
## Estimate Std. Error t value Pr(>|t|)
## GDPGrowth.l1 0.009302 0.017077 0.545 0.586944
## TSpread.l1 1.057695 0.075515 14.006 < 2e-16 ***
## GDPGrowth.l2 -0.056374 0.016703 -3.375 0.000988 ***
## TSpread.l2 -0.218659 0.075920 -2.880 0.004691 **
## const 0.454647 0.096461 4.713 6.48e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 0.4948 on 123 degrees of freedom
## Multiple R-Squared: 0.8304, Adjusted R-squared: 0.8249
## F-statistic: 150.6 on 4 and 123 DF, p-value: < 2.2e-16
##
##
##
## Covariance matrix of residuals:
## GDPGrowth TSpread
## GDPGrowth 5.86218 0.05959
## TSpread 0.05959 0.24486
##
## Correlation matrix of residuals:
## GDPGrowth TSpread
## GDPGrowth 1.00000 0.04974
## TSpread 0.04974 1.00000
Our estimate functions are:
\[\begin{align} GDPgrowth_t &= c_1 + \alpha_{11}GDPGrowth_{t-1} + \alpha_{12}TSpread_{t-1} + \alpha_{13}GDPGrowth_{t-2} + \alpha_{14}TSpread_{t-2} ~~~(1)\\ TSpread_t &= c_2 + \alpha_{21}GDPGrowth_{t-1} + \alpha_{22}TSpread_{t-1} + \alpha_{23}GDPGrowth_{t-2} + \alpha_{24}TSpread_{t-2} ~~~(2) \end{align}\]
In this first equation, the a11 coefficient = 0.295 implies that for every one unit increase in the last quarter GDP growth rate, GDP this quarter will rise by .295, holding constant the dynamic effects of prior GDP growth and the Term Spread. Meanwhile, the a12 coefficient = -0.875 implies that every one unit increase in the last quarter Term Spread will cause GDP growth this quarter to fall by .875 units.
Now Let’s look at the second equation. a21 = 0.009 implies that for every one unit increase in the last quarter GDP growth rate, Term Spread this quarter will rise by .009. Likewise, the a22 = 1.058 implies that a unit increase in the last quarter Term Spread will cause Term Spread this quarter to rise by 1.058 units.
However, since each coefficient in the VAR model only reflects a partial dynamic relationship and cannot capture a comprehensive dynamic relationship, we may need other tools such as Granger causality test, IRF impulse response function to help us understand the relationships.
# Obtain the adj. R^2 from the output of 'VAR()'
summary(VAR_fit$varresult$GDPGrowth)$adj.r.squared
## [1] 0.2886718
summary(VAR_fit$varresult$TSpread)$adj.r.squared
## [1] 0.824881
# Multi-step forecast
predictions <- predict(VAR_fit,n.ahead = 15, ci = 0.95)
predictions
## $GDPGrowth
## fcst lower upper CI
## [1,] 1.198711 -3.546741 5.944163 4.745452
## [2,] 1.383492 -3.624828 6.391812 5.008320
## [3,] 1.734391 -3.469314 6.938096 5.203705
## [4,] 2.045169 -3.266459 7.356797 5.311628
## [5,] 2.304776 -3.077621 7.687172 5.382396
## [6,] 2.514830 -2.924988 7.954647 5.439817
## [7,] 2.674189 -2.814666 8.163044 5.488855
## [8,] 2.787256 -2.742916 8.317427 5.530171
## [9,] 2.860470 -2.702736 8.423677 5.563207
## [10,] 2.901204 -2.686752 8.489160 5.587956
## [11,] 2.916929 -2.688342 8.522199 5.605271
## [12,] 2.914597 -2.701976 8.531170 5.616573
## [13,] 2.900280 -2.723177 8.523737 5.623457
## [14,] 2.878988 -2.748391 8.506367 5.627379
## [15,] 2.854633 -2.774856 8.484122 5.629489
##
## $TSpread
## fcst lower upper CI
## [1,] 1.805240 0.83538762 2.775093 0.9698527
## [2,] 2.015903 0.60191951 3.429887 1.4139838
## [3,] 2.137420 0.47220986 3.802629 1.6652097
## [4,] 2.212731 0.37280391 4.052657 1.8399267
## [5,] 2.248926 0.29235971 4.205491 1.9565658
## [6,] 2.255636 0.22410116 4.287172 2.0315352
## [7,] 2.242139 0.16481529 4.319463 2.0773237
## [8,] 2.216036 0.11229197 4.319781 2.1037443
## [9,] 2.183447 0.06528336 4.301611 2.1181637
## [10,] 2.148992 0.02326201 4.274722 2.1257301
## [11,] 2.115927 -0.01381595 4.245670 2.1297428
## [12,] 2.086338 -0.04576626 4.218441 2.1321039
## [13,] 2.061363 -0.07240850 4.195135 2.1337716
## [14,] 2.041416 -0.09372608 4.176558 2.1351420
## [15,] 2.026388 -0.10993889 4.162715 2.1363268
plot(predictions, names = "GDPGrowth")
plot(predictions, names = "TSpread")
causality()rm(list=ls()) # clear workspace
cat("\014") # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd <-"1980-04-01"
endd <-"2012-12-31"
TB3MS <- Quandl("FRED/TB3MS",start_date=startd, end_date=endd,type="ts")
TB10YS <- Quandl("FRED/GS10",start_date=startd, end_date=endd,type="ts")
GDP <- Quandl("FRED/GDPC96",start_date=startd, end_date=endd,type="ts", transform="rdiff")# note this is simple return, we need log return since we want to run regression
TSpread <- TB10YS - TB3MS
TSpread <- aggregate(TSpread,nfrequency=4,FUN=mean)# aggregate monthly data to quarterly(averages)
GDPGrowth = 400*log(1+GDP)# annual log growth rate%
### VAR Model
library(vars)
VAR_Data<-na.omit(ts.union(GDPGrowth, TSpread)) #set up data for estimation
VARselect(VAR_Data, lag.max = 12, type = "const")$selection #obtain the best lag period
## AIC(n) HQ(n) SC(n) FPE(n)
## 2 2 2 2
VAR_fit<-VAR(y = VAR_Data, p = 2)
summary(VAR_fit)
##
## VAR Estimation Results:
## =========================
## Endogenous variables: GDPGrowth, TSpread
## Deterministic variables: const
## Sample size: 128
## Log Likelihood: -381.122
## Roots of the characteristic polynomial:
## 0.8208 0.8208 0.1958 0.1958
## Call:
## VAR(y = VAR_Data, p = 2)
##
##
## Estimation results for equation GDPGrowth:
## ==========================================
## GDPGrowth = GDPGrowth.l1 + TSpread.l1 + GDPGrowth.l2 + TSpread.l2 + const
##
## Estimate Std. Error t value Pr(>|t|)
## GDPGrowth.l1 0.29512 0.08356 3.532 0.000582 ***
## TSpread.l1 -0.87548 0.36949 -2.369 0.019373 *
## GDPGrowth.l2 0.21663 0.08173 2.651 0.009089 **
## TSpread.l2 1.29865 0.37148 3.496 0.000658 ***
## const 0.48692 0.47198 1.032 0.304256
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 2.421 on 123 degrees of freedom
## Multiple R-Squared: 0.3111, Adjusted R-squared: 0.2887
## F-statistic: 13.88 on 4 and 123 DF, p-value: 2.246e-09
##
##
## Estimation results for equation TSpread:
## ========================================
## TSpread = GDPGrowth.l1 + TSpread.l1 + GDPGrowth.l2 + TSpread.l2 + const
##
## Estimate Std. Error t value Pr(>|t|)
## GDPGrowth.l1 0.009302 0.017077 0.545 0.586944
## TSpread.l1 1.057695 0.075515 14.006 < 2e-16 ***
## GDPGrowth.l2 -0.056374 0.016703 -3.375 0.000988 ***
## TSpread.l2 -0.218659 0.075920 -2.880 0.004691 **
## const 0.454647 0.096461 4.713 6.48e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 0.4948 on 123 degrees of freedom
## Multiple R-Squared: 0.8304, Adjusted R-squared: 0.8249
## F-statistic: 150.6 on 4 and 123 DF, p-value: < 2.2e-16
##
##
##
## Covariance matrix of residuals:
## GDPGrowth TSpread
## GDPGrowth 5.86218 0.05959
## TSpread 0.05959 0.24486
##
## Correlation matrix of residuals:
## GDPGrowth TSpread
## GDPGrowth 1.00000 0.04974
## TSpread 0.04974 1.00000
### Granger Causality Tests
# H0: GDPGrowth does not Granger-cause TSpread (GDPGrowth is not the cause varaible) <=> H0: a21 = a23 = 0
causality(VAR_fit, cause = "GDPGrowth")$Granger
##
## Granger causality H0: GDPGrowth do not Granger-cause TSpread
##
## data: VAR object VAR_fit
## F-Test = 6.1452, df1 = 2, df2 = 246, p-value = 0.002487
# H0: TSpread does not ranger-cause GDPGrowth (TSpread is not the cause varaible) <=> H0: a12 = a14 = 0
causality(VAR_fit, cause = "TSpread")$Granger
##
## Granger causality H0: TSpread do not Granger-cause GDPGrowth
##
## data: VAR object VAR_fit
## F-Test = 7.1708, df1 = 2, df2 = 246, p-value = 0.00094
linearHypothesis()rm(list=ls()) # clear workspace
cat("\014") # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd <-"1980-04-01"
endd <-"2012-12-31"
TB3MS <- Quandl("FRED/TB3MS",start_date=startd, end_date=endd,type="ts")
TB10YS <- Quandl("FRED/GS10",start_date=startd, end_date=endd,type="ts")
GDP <- Quandl("FRED/GDPC96",start_date=startd, end_date=endd,type="ts", transform="rdiff")# note this is simple return, we need log return since we want to run regression
TSpread <- TB10YS - TB3MS
TSpread <- aggregate(TSpread,nfrequency=4,FUN=mean)# aggregate monthly data to quarterly(averages)
GDPGrowth = 400*log(1+GDP)# annual log growth rate%
### Model selection
VAR_Data<-na.omit(ts.union(GDPGrowth, TSpread)) #set up data for estimation
VARselect(VAR_Data, lag.max = 12, type = "const")$selection #obtain the best lag period
## AIC(n) HQ(n) SC(n) FPE(n)
## 2 2 2 2
# Interpret: All the information criteria suggest using lags = 2 --> we need to set p=2 when estimating the mod
### Estimate VAR equations separately by OLS
library(dynlm)
VAR_EQ1<-dynlm(GDPGrowth ~ L(GDPGrowth, 1:2) + L(TSpread, 1:2), start=c(1981,1), end=c(2012,4))
VAR_EQ2<-dynlm(TSpread ~ L(GDPGrowth, 1:2) + L(TSpread, 1:2), start=c(1981,1), end=c(2012,4))
# Rename regressors for better readability
names(VAR_EQ1$coefficients) <- c("Intercept","Growth_t-1", "Growth_t-2", "TSpread_t-1", "TSpread_t-2")
names(VAR_EQ2$coefficients) <- names(VAR_EQ1$coefficients)
# Obtain Robust Coefficient
library(lmtest)
library(sandwich)
coeftest(VAR_EQ1, vcov. = sandwich)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## Intercept 0.486924 0.523771 0.9297 0.354373
## Growth_t-1 0.295117 0.109974 2.6835 0.008288 **
## Growth_t-2 0.216634 0.086355 2.5086 0.013421 *
## TSpread_t-1 -0.875477 0.361088 -2.4246 0.016781 *
## TSpread_t-2 1.298647 0.395225 3.2858 0.001325 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(VAR_EQ2, vcov. = sandwich)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## Intercept 0.4546467 0.1214656 3.7430 0.0002778 ***
## Growth_t-1 0.0093019 0.0218258 0.4262 0.6707140
## Growth_t-2 -0.0563737 0.0266037 -2.1190 0.0360996 *
## TSpread_t-1 1.0576951 0.0984832 10.7399 < 2.2e-16 ***
## TSpread_t-2 -0.2186588 0.1088367 -2.0091 0.0467207 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
### Granger causality tests
library(car)
# H0: X does not Granger Cause Y
linearHypothesis(VAR_EQ1,
hypothesis.matrix = c("TSpread_t-1", "TSpread_t-2"),
vcov. = sandwich)
## Linear hypothesis test
##
## Hypothesis:
## TSpread_t - 0
## TSpread_t - 2 = 0
##
## Model 1: restricted model
## Model 2: GDPGrowth ~ L(GDPGrowth, 1:2) + L(TSpread, 1:2)
##
## Note: Coefficient covariance matrix supplied.
##
## Res.Df Df F Pr(>F)
## 1 125
## 2 123 2 5.5884 0.004753 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
linearHypothesis(VAR_EQ2,
hypothesis.matrix = c("Growth_t-1", "Growth_t-2"),
vcov. = sandwich)
## Linear hypothesis test
##
## Hypothesis:
## Growth_t - 0
## Growth_t - 2 = 0
##
## Model 1: restricted model
## Model 2: TSpread ~ L(GDPGrowth, 1:2) + L(TSpread, 1:2)
##
## Note: Coefficient covariance matrix supplied.
##
## Res.Df Df F Pr(>F)
## 1 125
## 2 123 2 3.3739 0.03746 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Interpret: Both Granger causality tests reject at the level of 5%, this is evidence in favor of the conjecture that the term spread has power in explaining GDP growth and vice versa
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd <-"1980-04-01"
endd <-"2012-12-31"
TB3MS <- Quandl("FRED/TB3MS",start_date=startd, end_date=endd,type="ts")
TB10YS <- Quandl("FRED/GS10",start_date=startd, end_date=endd,type="ts")
GDP <- Quandl("FRED/GDPC96",start_date=startd, end_date=endd,type="ts", transform="rdiff")# note this is simple return, we need log return since we want to run regression
TSpread <- TB10YS - TB3MS
TSpread <- aggregate(TSpread,nfrequency=4,FUN=mean)# aggregate monthly data to quarterly(averages)
GDPGrowth = 400*log(1+GDP)# annual log growth rate%
### VAR Model
library(vars)
VAR_Data<-na.omit(ts.union(GDPGrowth, TSpread)) #set up data for estimation
VARselect(VAR_Data, lag.max = 12, type = "const")$selection #obtain the best lag period
## AIC(n) HQ(n) SC(n) FPE(n)
## 2 2 2 2
VAR_fit<-VAR(y = VAR_Data, p = 2)
summary(VAR_fit)
##
## VAR Estimation Results:
## =========================
## Endogenous variables: GDPGrowth, TSpread
## Deterministic variables: const
## Sample size: 128
## Log Likelihood: -381.122
## Roots of the characteristic polynomial:
## 0.8208 0.8208 0.1958 0.1958
## Call:
## VAR(y = VAR_Data, p = 2)
##
##
## Estimation results for equation GDPGrowth:
## ==========================================
## GDPGrowth = GDPGrowth.l1 + TSpread.l1 + GDPGrowth.l2 + TSpread.l2 + const
##
## Estimate Std. Error t value Pr(>|t|)
## GDPGrowth.l1 0.29512 0.08356 3.532 0.000582 ***
## TSpread.l1 -0.87548 0.36949 -2.369 0.019373 *
## GDPGrowth.l2 0.21663 0.08173 2.651 0.009089 **
## TSpread.l2 1.29865 0.37148 3.496 0.000658 ***
## const 0.48692 0.47198 1.032 0.304256
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 2.421 on 123 degrees of freedom
## Multiple R-Squared: 0.3111, Adjusted R-squared: 0.2887
## F-statistic: 13.88 on 4 and 123 DF, p-value: 2.246e-09
##
##
## Estimation results for equation TSpread:
## ========================================
## TSpread = GDPGrowth.l1 + TSpread.l1 + GDPGrowth.l2 + TSpread.l2 + const
##
## Estimate Std. Error t value Pr(>|t|)
## GDPGrowth.l1 0.009302 0.017077 0.545 0.586944
## TSpread.l1 1.057695 0.075515 14.006 < 2e-16 ***
## GDPGrowth.l2 -0.056374 0.016703 -3.375 0.000988 ***
## TSpread.l2 -0.218659 0.075920 -2.880 0.004691 **
## const 0.454647 0.096461 4.713 6.48e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 0.4948 on 123 degrees of freedom
## Multiple R-Squared: 0.8304, Adjusted R-squared: 0.8249
## F-statistic: 150.6 on 4 and 123 DF, p-value: < 2.2e-16
##
##
##
## Covariance matrix of residuals:
## GDPGrowth TSpread
## GDPGrowth 5.86218 0.05959
## TSpread 0.05959 0.24486
##
## Correlation matrix of residuals:
## GDPGrowth TSpread
## GDPGrowth 1.00000 0.04974
## TSpread 0.04974 1.00000
### IRF
# Consider the response of GDP growth to term spread shock
IRF_GDP<- irf(VAR_fit, impulse = "TSpread", response = "GDPGrowth", n.ahead = 20, boot = TRUE)
plot(IRF_GDP, ylab = "GDP growth", main = "Shock from Term spread")
#Interpret: The IRF estimates the effects from one unit shock to the error in the TSpread on future value of GDPGrowth. For example, IRF_GDP[21]=-0.025 implies that the influence comes from one unit shock to the error in the TSpread will cause 20 step-ahead GDPGrowth decrease by 0.025. Note that a positive shock to Term spread has an immediate negative impact on GDP growth, with growth falling to -0.432. By about the 5qtr growth turns positive again and by the 20th quarter the impact of the shock is largely dissipated.
# Consider the response of term spread to GDP growth shock
IRF_TSpread <- irf(VAR_fit, impulse="GDPGrowth", response="TSpread", n.ahead = 20, boot = TRUE)
plot(IRF_TSpread, ylab = "Term spread", main = "Shock from GDP growth")
### Housekeeping
rm(list=ls()) # clear workspace
cat("\014") # clear console
# prepare library
library(rvest) # crawl data from html
library(Quandl)
library(quantmod)
library(PerformanceAnalytics)
library(here)
library("readxl")
library(tidyverse)
library(data.table)
library(plyr)
library(ggplot2)
# fetch DOWJIA ticker list from wiki
url <- "https://en.wikipedia.org/wiki/Dow_Jones_Industrial_Average"
DOWJIA <- url %>%
xml2::read_html() %>%
html_nodes(xpath='//*[@id="constituents"]') %>%
html_table()
DOWJIA <- DOWJIA[[1]]
DOW_Tickers <- DOWJIA$Symbol
### load data (Dow 30 Constituents)
Tickers<-c("^GSPC",DOW_Tickers)
Rf_Tickers<-'DTB4WK'
startd = "2018-12-01"
endd = "2021-04-15"
# pull stock price from yahoo
getSymbols(Tickers,from=startd,to=endd,src='yahoo')
## [1] "^GSPC" "MMM" "AXP" "AMGN" "AAPL" "BA" "CAT" "CVX" "CSCO"
## [10] "KO" "DIS" "DOW" "GS" "HD" "HON" "IBM" "INTC" "JNJ"
## [19] "JPM" "MCD" "MRK" "MSFT" "NKE" "PG" "CRM" "TRV" "UNH"
## [28] "VZ" "V" "WBA" "WMT"
getSymbols(Rf_Tickers,src='FRED')
## [1] "DTB4WK"
### stock Return
tickers = gsub("[[:punct:]]", "", Tickers)
Return = do.call(merge,lapply(tickers, function(x)
periodReturn(Ad(get(x)),period='daily',type='arithmetic')))
Return = Return[-1,]
names(Return) = tickers
### Rf
DTB4WK = na.omit(DTB4WK)
Rf = DTB4WK[paste(startd,"/",endd,sep="")] # annual rate
Rf = (Rf/100+1)^(1/252)-1 # convert to daily date, business day
names(Rf) = "Rf"
### merge data
Data = merge(Return,Rf)
Data = na.omit(Data)
### excess return
NumCol = ncol(Data)
StocksEXRet = Data[,1:NumCol-1]
for (i in 1:ncol(StocksEXRet)){
StocksEXRet[,i]<- StocksEXRet[,i]-Data$Rf
}
Rf = Data$Rf
### log return
LogStocksEXRet = log(1+StocksEXRet)
LogRf = log(1+Rf)
# print data
head(cbind(LogStocksEXRet[,1:4],LogRf))
## GSPC MMM AXP AMGN Rf
## 2019-03-20 -0.0030434855 -0.0035455972 -0.017103361 -0.0020288662 9.450071e-05
## 2019-03-21 0.0106986043 0.0061733709 0.009339165 0.0039246867 9.643767e-05
## 2019-03-22 -0.0192543499 -0.0239964108 -0.021429188 -0.0275174206 9.566300e-05
## 2019-03-25 -0.0009343499 -0.0072058276 -0.003939614 -0.0006842911 9.488817e-05
## 2019-03-26 0.0070628327 0.0195445844 0.004115378 0.0088691184 9.488817e-05
## 2019-03-27 -0.0047500940 -0.0004807677 -0.004855138 -0.0105066826 9.450071e-05
# set parameters
event_date = as.Date("2020-01-06")
event_window = 10
estimate_window = 250
postevent_window = 30
T1 = event_date - event_window
T2 = event_date + event_window
T0 = T1 - estimate_window
T3 = T2 + postevent_window
### fit CAPM model
# estimate data
Estimate_Data = LogStocksEXRet[paste(T0,"/",T1-1,sep="")]
# CAPM regression
model<-lm(Estimate_Data[,2]~Estimate_Data[,1])
Coeff<- data.frame(model$coefficients)
Coeff = t(Coeff)
NumCols = ncol(Estimate_Data)
for (i in 3:NumCols){
model<-lm(Estimate_Data[,i]~Estimate_Data[,1])
coeff = data.frame(model$coefficients)
coeff = t(coeff)
coeff
Coeff = rbind(Coeff,coeff)
}
# save betas for all tickers
Tickers<-DOW_Tickers
Coeff<-data.frame(Coeff,Tickers)
head(Coeff)
## X.Intercept. Estimate_Data...1. Tickers
## model.coefficients -1.824561e-03 1.1560208 MMM
## model.coefficients.1 -7.993798e-05 1.1140154 AXP
## model.coefficients.2 1.449461e-03 0.6143389 AMGN
## model.coefficients.3 1.142455e-03 1.4775025 AAPL
## model.coefficients.4 -1.518523e-03 0.8504409 BA
## model.coefficients.5 -5.245564e-04 1.2804610 CAT
### predict "normal" return
Test_data = LogStocksEXRet[paste(T1,"/",T3,sep="")]
Prediction = Test_data[,-1]
for(i in 1:ncol(Prediction)){
Prediction[,i] = Coeff[i,1]+Coeff[i,2]*Test_data[,1]
}
### abnormal return
AR = Test_data[,-1]
for(i in 1:ncol(AR)){
AR[,i] = Test_data[,i+1]-Prediction[,i]
}
### Cumulative AR
CAR = cumsum(AR)
### convert to long table
CAR_df = data.frame(CAR)
CAR_df$Date = index(CAR)
CAR_df=melt(setDT(CAR_df), measure.vars=list(c(1:30)),
variable.name='Ticker', value.name=c("CAR"))[,
Ticker:= DOW_Tickers[Ticker]][order(Date)]
# average CAR
AvgCAR <- ddply(CAR_df, "Date", summarise, Ticker = "AVERAGE",AvgCAR=mean(CAR))
### plot
ggplot(CAR_df, aes(x=Date, y=CAR, group=Ticker, color=Ticker)) +
geom_line(size=0.8,alpha=0.8)+
geom_line(data = AvgCAR, aes(x=Date,y=AvgCAR),size = 1.2, color = "black")+
geom_vline(aes(xintercept = event_date),linetype="dashed",color = "darkred",size=1.5)+
geom_hline(aes(yintercept = 0),linetype="dashed",color = "darkred",size=1.1)+
annotate(geom="text", x=event_date+10, y=0.4, label="Jan6th",fontface =2,
size =8,alpha = 0.8,color="darkred")+
scale_x_date(date_breaks = "5 days", date_labels = "%b %d")+
ggtitle("Cumulative Abnormal Log Excess Return")+ylab("CAR")
d<-data.frame(CAR=as.numeric(t(CAR[T2])))
d$Ticker = DOW_Tickers
titletoplot = paste0("CAR ",event_window," days after Jan6th")
ggplot(data = d,aes(x = Ticker, y=CAR,fill=Ticker))+
geom_bar(stat="identity",alpha=0.5)+
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
labs(y="CAR",x='Ticker',title = titletoplot)
### calculate J-stat
Avg_event_CAR = rowMeans(CAR[paste(T1,"/",T2,sep="")])
JCAR = tail(Avg_event_CAR,1)
event_AR = AR[paste(T1,"/",T2,sep="")]
Jsigma = sqrt(sum(sapply(event_AR,FUN=var))/30^2)
Jstat = JCAR/Jsigma
pvalues = pnorm(q=abs(Jstat),lower.tail = FALSE)*2
# print result
print(cbind(CAR = JCAR, Jstat,Pvalue = pvalues))
## CAR Jstat Pvalue
## [1,] -0.0046184 -3.13541 0.001716142
Use a logistic to model recessions with 10yr-2yr Treasury Spread. Note: This is a simplified version of the typical models used.
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(fredr)
library(xts)
fred_api_key<-read.csv("../data/fredkey.csv",stringsAsFactors=FALSE)
fredr_set_key(fred_api_key$Key)
TenTwo<-fredr(
series_id = "T10Y2Y",
frequency = "m", # monthly
observation_start = as.Date("1977-01-01"),
observation_end = as.Date("2022-01-01"),
units = "lin"
)
Recession<-fredr(
series_id = "USREC",
frequency = "m", # monthly
observation_start = as.Date("1977-01-01"),
observation_end = as.Date("2022-01-01"),
units = "lin"
)
MyData<-data.frame(TenTwo=TenTwo$value,Recession=Recession$value,row.names = Recession$date)
MyData<-as.xts(MyData)
MyData$TenTwoLag4 <- lag.xts(MyData$TenTwo,k=4)
MyData<-na.omit(MyData)
logit1<-glm(Recession~TenTwoLag4,data=MyData,family = 'binomial')
summary(logit1)
##
## Call:
## glm(formula = Recession ~ TenTwoLag4, family = "binomial", data = MyData)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.9476 -0.5342 -0.4136 -0.2796 2.6292
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.5604 0.1633 -9.557 < 2e-16 ***
## TenTwoLag4 -0.8068 0.1715 -4.704 2.55e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 367.66 on 536 degrees of freedom
## Residual deviance: 342.96 on 535 degrees of freedom
## AIC: 346.96
##
## Number of Fisher Scoring iterations: 5
Reference: https://sebastiansauer.github.io/convert_logit2prob/
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(fredr)
fred_api_key<-read.csv("../data/fredkey.csv",stringsAsFactors=FALSE)
fredr_set_key(fred_api_key$Key)
TenTwo<-fredr(
series_id = "T10Y2Y",
frequency = "m", # monthly
observation_start = as.Date("1977-01-01"),
observation_end = as.Date("2022-01-01"),
units = "lin"
)
Recession<-fredr(
series_id = "USREC",
frequency = "m", # monthly
observation_start = as.Date("1977-01-01"),
observation_end = as.Date("2022-01-01"),
units = "lin"
)
MyData<-data.frame(TenTwo=TenTwo$value,Recession=Recession$value,row.names = Recession$date)
library(xts)
MyData<-as.xts(MyData)
MyData$TenTwoLag4 <- lag.xts(MyData$TenTwo,k=4)
MyData<-na.omit(MyData)
logit1<-glm(Recession~TenTwoLag4,data=MyData,family = 'binomial')
logit2prob <- function(logit){
odds <- exp(logit)
prob <- odds / (1 + odds)
return(prob)
}
logit2prob(coef(logit1))
## (Intercept) TenTwoLag4
## 0.1735834 0.3085671
Where .3 is read as 30% probability
Use a logistic to model recessions with 10yr-2yr Treasury Spread. Note: This is a simplified version of the typical models used.
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(fredr)
library(xts)
fred_api_key<-read.csv("../data/fredkey.csv",stringsAsFactors=FALSE)
fredr_set_key(fred_api_key$Key)
TenTwo<-fredr(
series_id = "T10Y2Y",
frequency = "m", # monthly
observation_start = as.Date("1977-01-01"),
observation_end = as.Date("2022-01-01"),
units = "lin"
)
Recession<-fredr(
series_id = "USREC",
frequency = "m", # monthly
observation_start = as.Date("1977-01-01"),
observation_end = as.Date("2022-01-01"),
units = "lin"
)
MyData<-data.frame(TenTwo=TenTwo$value,Recession=Recession$value,row.names = Recession$date)
MyData<-as.xts(MyData)
MyData$TenTwoLag4 <- lag.xts(MyData$TenTwo,k=4)
MyData<-na.omit(MyData)
logit1<-glm(Recession~TenTwoLag4,data=MyData,family = 'binomial')
newdata<-data.frame(TenTwoLag4=tail(TenTwo$value,n=1)) #last observed value of 10-2yr
PRED<-predict(logit1, newdata = newdata, type = 'response')
coeffs<-coef(logit1)
TenTwoMean=mean(TenTwo$value)
MgrlEffectTenTwo =(exp(-TenTwoMean)/(1+exp(-TenTwoMean))^2)*coeffs[2]
print(paste('For every 1 percentage point (i.e. 100bps) increase in the 10-2yr spread, the probability of a recession changes by ', MgrlEffectTenTwo,' %'))
## [1] "For every 1 percentage point (i.e. 100bps) increase in the 10-2yr spread, the probability of a recession changes by -0.163825499802387 %"
Use a logistic model to model recessions with 10yr-2yr Treasury Spread
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(fredr)
fred_api_key<-read.csv("../data/fredkey.csv",stringsAsFactors=FALSE)
fredr_set_key(fred_api_key$Key)
TenTwo<-fredr(
series_id = "T10Y2Y",
frequency = "m", # monthly
observation_start = as.Date("1977-01-01"),
observation_end = as.Date("2022-01-01"),
units = "lin"
)
Recession<-fredr(
series_id = "USREC",
frequency = "m", # monthly
observation_start = as.Date("1977-01-01"),
observation_end = as.Date("2022-01-01"),
units = "lin"
)
MyData<-data.frame(TenTwo=TenTwo$value,Recession=Recession$value,row.names = Recession$date)
library(xts)
MyData<-as.xts(MyData)
MyData$TenTwoLag4 <- lag.xts(MyData$TenTwo,k=4)
MyData<-na.omit(MyData)
logit1<-glm(Recession~TenTwoLag4,data=MyData,family = 'binomial')
Pred<-predict(logit1,MyData,type="response")
cutoff = .3
Pred2<-ifelse(Pred>=cutoff,1,0)
library(caret)
Actual<-factor(MyData$Recession)
Predicted<-factor(Pred2)
C<-confusionMatrix(data=Predicted,reference=Actual)
C
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 477 48
## 1 2 10
##
## Accuracy : 0.9069
## 95% CI : (0.8791, 0.9301)
## No Information Rate : 0.892
## P-Value [Acc > NIR] : 0.148
##
## Kappa : 0.2582
##
## Mcnemar's Test P-Value : 1.966e-10
##
## Sensitivity : 0.9958
## Specificity : 0.1724
## Pos Pred Value : 0.9086
## Neg Pred Value : 0.8333
## Prevalence : 0.8920
## Detection Rate : 0.8883
## Detection Prevalence : 0.9777
## Balanced Accuracy : 0.5841
##
## 'Positive' Class : 0
##
Note: Accuracy = (477+10)/537 (i.e. % of results that are correctly classified) Sensitivity = 477/(477+2) (i.e. % of actual 0’s correctly classified) Specificity = 10/(48+10) (i.e. % of actual 1’s correctly classified)
Use a logistic to model recessions with 10yr-2yr Treasury Spread. Note: This is a simplified version of the typical models used.
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(fredr)
library(xts)
fred_api_key<-read.csv("../data/fredkey.csv",stringsAsFactors=FALSE)
fredr_set_key(fred_api_key$Key)
TenTwo<-fredr(
series_id = "T10Y2Y",
frequency = "m", # monthly
observation_start = as.Date("1977-01-01"),
observation_end = as.Date("2022-01-01"),
units = "lin"
)
Recession<-fredr(
series_id = "USREC",
frequency = "m", # monthly
observation_start = as.Date("1977-01-01"),
observation_end = as.Date("2022-01-01"),
units = "lin"
)
MyData<-data.frame(TenTwo=TenTwo$value,Recession=Recession$value,row.names = Recession$date)
MyData<-as.xts(MyData)
MyData$TenTwoLag4 <- lag.xts(MyData$TenTwo,k=4)
MyData<-na.omit(MyData)
logit1<-glm(Recession~TenTwoLag4,data=MyData,family = 'binomial')
newdata<-data.frame(TenTwoLag4=tail(TenTwo$value,n=1)) #last observed value of 10-2yr
#newdata
PRED<-predict(logit1, newdata = newdata, type = 'response')
coeffs<-coef(logit1)
ByHand = 1/(1+exp(-(coeffs[1]+coeffs[2]*newdata)))
#ByHand
print(paste('Predicted Probability of Recession given 10-2yr = ', as.numeric(newdata[1])))
## [1] "Predicted Probability of Recession given 10-2yr = 0.78"
print(paste('Via predict ',PRED,' | By Hand ',ByHand))
## [1] "Via predict 0.100673319918696 | By Hand 0.100673319918696"
Use a logistic to model recessions with 10yr-2yr Treasury Spread. Note: This is a simplified version of the typical implementation. Recession in Next 3 mths = f(10-2yr Treasury Spread @ time t)
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(fredr)
library(xts)
fred_api_key<-read.csv("../data/fredkey.csv",stringsAsFactors=FALSE)
fredr_set_key(fred_api_key$Key)
TenTwo<-fredr(
series_id = "T10Y2Y",
frequency = "m", # monthly
observation_start = as.Date("1977-01-01"),
observation_end = as.Date("2022-01-01"),
units = "lin"
)
Recession<-fredr(
series_id = "USREC",
frequency = "m", # monthly
observation_start = as.Date("1977-01-01"),
observation_end = as.Date("2022-01-01"),
units = "lin"
)
MyData<-data.frame(TenTwo=TenTwo$value,Recession=Recession$value,row.names = Recession$date)
MyData<-as.xts(MyData)
USRECLEADS<-lag.xts(MyData$Recession,k=-3:-1)
USRECLEADS$RowSum<-rowSums(USRECLEADS)
USRECLEADS$RecNext3Mths<-ifelse(USRECLEADS$RowSum>=1,1,0)
MyData$RecNext3Mths<-USRECLEADS$RecNext3Mths
MyData<-na.omit(MyData)
logit1<-glm(RecNext3Mths~TenTwo,data=MyData,family = 'binomial')
summary(logit1)
##
## Call:
## glm(formula = RecNext3Mths ~ TenTwo, family = "binomial", data = MyData)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.8367 -0.5824 -0.4896 -0.3788 2.3567
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.5011 0.1603 -9.364 < 2e-16 ***
## TenTwo -0.5134 0.1489 -3.448 0.000565 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 415.98 on 537 degrees of freedom
## Residual deviance: 403.50 on 536 degrees of freedom
## AIC: 407.5
##
## Number of Fisher Scoring iterations: 5